summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorAlex Waterman <alexw@nvidia.com>2017-10-20 13:26:22 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-11-10 18:47:01 -0500
commit01c98eb68055f0b18d4f5b9dc4aa78601a00bc1e (patch)
tree535c341ede5f52165c074a860f8e4c81247e34c5 /drivers
parent8428c82c816f361ce7bbb1fe4804f350b8cbea2f (diff)
gpu: nvgpu: VM map path refactoring
Final VM mapping refactoring. Move most of the logic in the VM map path to the common/mm/vm.c code and use the generic APIs previously implemented to deal with comptags and map caching. This also updates the mapped_buffer struct to finally be free of the Linux dma_buf and scatter gather table pointers. This is replaced with the nvgpu_os_buffer struct. JIRA NVGPU-30 JIRA NVGPU-71 JIRA NVGPU-224 Change-Id: If5b32886221c3e5af2f3d7ddd4fa51dd487bb981 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1583987 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/nvgpu/common/linux/cde.c2
-rw-r--r--drivers/gpu/nvgpu/common/linux/vm.c301
-rw-r--r--drivers/gpu/nvgpu/common/mm/vm.c244
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/linux/vm.h5
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/vm.h53
5 files changed, 357 insertions, 248 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/cde.c b/drivers/gpu/nvgpu/common/linux/cde.c
index 003da143..8e847206 100644
--- a/drivers/gpu/nvgpu/common/linux/cde.c
+++ b/drivers/gpu/nvgpu/common/linux/cde.c
@@ -64,7 +64,7 @@ static dma_addr_t gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr)
64 nvgpu_mutex_acquire(&vm->update_gmmu_lock); 64 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
65 buffer = __nvgpu_vm_find_mapped_buf(vm, gpu_vaddr); 65 buffer = __nvgpu_vm_find_mapped_buf(vm, gpu_vaddr);
66 if (buffer) 66 if (buffer)
67 addr = nvgpu_mem_get_addr_sgl(g, buffer->sgt->sgl); 67 addr = nvgpu_mem_get_addr_sgl(g, buffer->os_priv.sgt->sgl);
68 nvgpu_mutex_release(&vm->update_gmmu_lock); 68 nvgpu_mutex_release(&vm->update_gmmu_lock);
69 69
70 return addr; 70 return addr;
diff --git a/drivers/gpu/nvgpu/common/linux/vm.c b/drivers/gpu/nvgpu/common/linux/vm.c
index 9178a0b0..d6d86c94 100644
--- a/drivers/gpu/nvgpu/common/linux/vm.c
+++ b/drivers/gpu/nvgpu/common/linux/vm.c
@@ -49,8 +49,8 @@ static struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf_reverse(
49 struct nvgpu_mapped_buf *mapped_buffer = 49 struct nvgpu_mapped_buf *mapped_buffer =
50 mapped_buffer_from_rbtree_node(node); 50 mapped_buffer_from_rbtree_node(node);
51 51
52 if (mapped_buffer->dmabuf == dmabuf && 52 if (mapped_buffer->os_priv.dmabuf == dmabuf &&
53 kind == mapped_buffer->kind) 53 mapped_buffer->kind == kind)
54 return mapped_buffer; 54 return mapped_buffer;
55 55
56 nvgpu_rbtree_enum_next(&node, node); 56 nvgpu_rbtree_enum_next(&node, node);
@@ -75,7 +75,7 @@ int nvgpu_vm_find_buf(struct vm_gk20a *vm, u64 gpu_va,
75 return -EINVAL; 75 return -EINVAL;
76 } 76 }
77 77
78 *dmabuf = mapped_buffer->dmabuf; 78 *dmabuf = mapped_buffer->os_priv.dmabuf;
79 *offset = gpu_va - mapped_buffer->addr; 79 *offset = gpu_va - mapped_buffer->addr;
80 80
81 nvgpu_mutex_release(&vm->update_gmmu_lock); 81 nvgpu_mutex_release(&vm->update_gmmu_lock);
@@ -83,66 +83,68 @@ int nvgpu_vm_find_buf(struct vm_gk20a *vm, u64 gpu_va,
83 return 0; 83 return 0;
84} 84}
85 85
86u64 nvgpu_os_buf_get_size(struct nvgpu_os_buffer *os_buf)
87{
88 return os_buf->dmabuf->size;
89}
90
86/* 91/*
87 * vm->update_gmmu_lock must be held. This checks to see if we already have 92 * vm->update_gmmu_lock must be held. This checks to see if we already have
88 * mapped the passed buffer into this VM. If so, just return the existing 93 * mapped the passed buffer into this VM. If so, just return the existing
89 * mapping address. 94 * mapping address.
90 */ 95 */
91static u64 __nvgpu_vm_find_mapping(struct vm_gk20a *vm, 96struct nvgpu_mapped_buf *nvgpu_vm_find_mapping(struct vm_gk20a *vm,
92 struct dma_buf *dmabuf, 97 struct nvgpu_os_buffer *os_buf,
93 u64 offset_align, 98 u64 map_addr,
94 u32 flags, 99 u32 flags,
95 int kind, 100 int kind)
96 int rw_flag)
97{ 101{
98 struct gk20a *g = gk20a_from_vm(vm); 102 struct gk20a *g = gk20a_from_vm(vm);
99 struct nvgpu_mapped_buf *mapped_buffer = NULL; 103 struct nvgpu_mapped_buf *mapped_buffer = NULL;
100 104
101 if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) { 105 if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
102 mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, offset_align); 106 mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, map_addr);
103 if (!mapped_buffer) 107 if (!mapped_buffer)
104 return 0; 108 return NULL;
105 109
106 if (mapped_buffer->dmabuf != dmabuf || 110 if (mapped_buffer->os_priv.dmabuf != os_buf->dmabuf ||
107 mapped_buffer->kind != (u32)kind) 111 mapped_buffer->kind != (u32)kind)
108 return 0; 112 return NULL;
109 } else { 113 } else {
110 mapped_buffer = 114 mapped_buffer =
111 __nvgpu_vm_find_mapped_buf_reverse(vm, dmabuf, kind); 115 __nvgpu_vm_find_mapped_buf_reverse(vm,
116 os_buf->dmabuf,
117 kind);
112 if (!mapped_buffer) 118 if (!mapped_buffer)
113 return 0; 119 return NULL;
114 } 120 }
115 121
116 if (mapped_buffer->flags != flags) 122 if (mapped_buffer->flags != flags)
117 return 0; 123 return NULL;
118 124
119 /* 125 /*
120 * If we find the mapping here then that means we have mapped it already 126 * If we find the mapping here then that means we have mapped it already
121 * and already have a dma_buf ref to the underlying buffer. As such 127 * and the prior pin and get must be undone.
122 * release the ref taken earlier in the map path.
123 */ 128 */
124 dma_buf_put(mapped_buffer->dmabuf); 129 gk20a_mm_unpin(os_buf->dev, os_buf->dmabuf, mapped_buffer->os_priv.sgt);
125 130 dma_buf_put(os_buf->dmabuf);
126 nvgpu_ref_get(&mapped_buffer->ref);
127 131
128 nvgpu_log(g, gpu_dbg_map, 132 nvgpu_log(g, gpu_dbg_map,
129 "gv: 0x%04x_%08x + 0x%-7zu " 133 "gv: 0x%04x_%08x + 0x%-7zu "
130 "[dma: 0x%02x_%08x, pa: 0x%02x_%08x] " 134 "[dma: 0x%010llx, pa: 0x%010llx] "
131 "pgsz=%-3dKb as=%-2d ctags=%d start=%d " 135 "pgsz=%-3dKb as=%-2d ctags=%d start=%d "
132 "flags=0x%x apt=%s (reused)", 136 "flags=0x%x apt=%s (reused)",
133 u64_hi32(mapped_buffer->addr), u64_lo32(mapped_buffer->addr), 137 u64_hi32(mapped_buffer->addr), u64_lo32(mapped_buffer->addr),
134 dmabuf->size, 138 os_buf->dmabuf->size,
135 u64_hi32((u64)sg_dma_address(mapped_buffer->sgt->sgl)), 139 (u64)sg_dma_address(mapped_buffer->os_priv.sgt->sgl),
136 u64_lo32((u64)sg_dma_address(mapped_buffer->sgt->sgl)), 140 (u64)sg_phys(mapped_buffer->os_priv.sgt->sgl),
137 u64_hi32((u64)sg_phys(mapped_buffer->sgt->sgl)),
138 u64_lo32((u64)sg_phys(mapped_buffer->sgt->sgl)),
139 vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10, 141 vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10,
140 vm_aspace_id(vm), 142 vm_aspace_id(vm),
141 mapped_buffer->ctag_lines, mapped_buffer->ctag_offset, 143 mapped_buffer->ctag_lines, mapped_buffer->ctag_offset,
142 mapped_buffer->flags, 144 mapped_buffer->flags,
143 nvgpu_aperture_str(gk20a_dmabuf_aperture(g, dmabuf))); 145 nvgpu_aperture_str(gk20a_dmabuf_aperture(g, os_buf->dmabuf)));
144 146
145 return mapped_buffer->addr; 147 return mapped_buffer;
146} 148}
147 149
148int nvgpu_vm_map_linux(struct vm_gk20a *vm, 150int nvgpu_vm_map_linux(struct vm_gk20a *vm,
@@ -159,237 +161,62 @@ int nvgpu_vm_map_linux(struct vm_gk20a *vm,
159{ 161{
160 struct gk20a *g = gk20a_from_vm(vm); 162 struct gk20a *g = gk20a_from_vm(vm);
161 struct device *dev = dev_from_gk20a(g); 163 struct device *dev = dev_from_gk20a(g);
162 struct nvgpu_ctag_buffer_info binfo = { 0 }; 164 struct nvgpu_os_buffer os_buf = { dmabuf, dev };
163 struct gk20a_comptags comptags;
164 struct nvgpu_vm_area *vm_area = NULL;
165 struct nvgpu_sgt *nvgpu_sgt = NULL;
166 struct sg_table *sgt; 165 struct sg_table *sgt;
166 struct nvgpu_sgt *nvgpu_sgt = NULL;
167 struct nvgpu_mapped_buf *mapped_buffer = NULL; 167 struct nvgpu_mapped_buf *mapped_buffer = NULL;
168 struct nvgpu_os_buffer os_buf = { dmabuf, dev }; 168 u64 map_addr = 0ULL;
169 enum nvgpu_aperture aperture;
170 bool va_allocated = false;
171 bool clear_ctags = false;
172 u64 map_offset = 0;
173 u64 align;
174 u32 ctag_offset;
175 int err = 0; 169 int err = 0;
176 170
177 /* 171 if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)
178 * The kind used as part of the key for map caching. HW may 172 map_addr = offset_align;
179 * actually be programmed with the fallback kind in case the
180 * key kind is compressible but we're out of comptags.
181 */
182 s16 map_key_kind;
183
184 binfo.flags = flags;
185 binfo.size = dmabuf->size;
186 binfo.compr_kind = compr_kind;
187 binfo.incompr_kind = incompr_kind;
188
189 if (compr_kind != NV_KIND_INVALID)
190 map_key_kind = compr_kind;
191 else
192 map_key_kind = incompr_kind;
193
194 if (map_key_kind == NV_KIND_INVALID) {
195 nvgpu_err(g, "Valid kind must be supplied");
196 return -EINVAL;
197 }
198
199 if (vm->userspace_managed &&
200 !(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)) {
201 nvgpu_err(g, "non-fixed-offset mapping not available on "
202 "userspace managed address spaces");
203 return -EFAULT;
204 }
205
206 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
207
208 /* check if this buffer is already mapped */
209 if (!vm->userspace_managed) {
210 map_offset = __nvgpu_vm_find_mapping(
211 vm, dmabuf, offset_align,
212 flags, map_key_kind, rw_flag);
213 if (map_offset) {
214 nvgpu_mutex_release(&vm->update_gmmu_lock);
215 *gpu_va = map_offset;
216 return 0;
217 }
218 }
219 173
220 sgt = gk20a_mm_pin(dev, dmabuf); 174 sgt = gk20a_mm_pin(dev, dmabuf);
221 if (IS_ERR(sgt)) { 175 if (IS_ERR(sgt)) {
222 err = PTR_ERR(sgt); 176 nvgpu_warn(g, "Failed to pin dma_buf!");
223 nvgpu_warn(g, "oom allocating tracking buffer"); 177 return PTR_ERR(sgt);
224 goto clean_up;
225 }
226
227 nvgpu_sgt = nvgpu_linux_sgt_create(g, sgt);
228 if (!nvgpu_sgt)
229 goto clean_up;
230
231 aperture = gk20a_dmabuf_aperture(g, dmabuf);
232 if (aperture == APERTURE_INVALID) {
233 err = -EINVAL;
234 goto clean_up;
235 } 178 }
236 179
237 if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) 180 if (gk20a_dmabuf_aperture(g, dmabuf) == APERTURE_INVALID) {
238 map_offset = offset_align;
239
240 align = nvgpu_sgt_alignment(g, nvgpu_sgt);
241 if (g->mm.disable_bigpage)
242 binfo.pgsz_idx = gmmu_page_size_small;
243 else
244 binfo.pgsz_idx = __get_pte_size(vm, map_offset,
245 min_t(u64, binfo.size, align));
246 mapping_size = mapping_size ? mapping_size : binfo.size;
247 mapping_size = ALIGN(mapping_size, SZ_4K);
248
249 if ((mapping_size > binfo.size) ||
250 (buffer_offset > (binfo.size - mapping_size))) {
251 err = -EINVAL; 181 err = -EINVAL;
252 goto clean_up; 182 goto clean_up;
253 } 183 }
254 184
255 /* Check if we should use a fixed offset for mapping this buffer */ 185 nvgpu_sgt = nvgpu_linux_sgt_create(g, sgt);
256 if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) { 186 if (!nvgpu_sgt) {
257 err = nvgpu_vm_area_validate_buffer(vm, 187 err = -ENOMEM;
258 offset_align,
259 mapping_size,
260 binfo.pgsz_idx,
261 &vm_area);
262 if (err)
263 goto clean_up;
264
265 map_offset = offset_align;
266 va_allocated = false;
267 } else {
268 va_allocated = true;
269 }
270
271 err = nvgpu_vm_compute_compression(vm, &binfo);
272 if (err) {
273 nvgpu_err(g, "failure setting up compression");
274 goto clean_up; 188 goto clean_up;
275 } 189 }
276 190
277 /* bar1 and pmu vm don't need ctag */ 191 mapped_buffer = nvgpu_vm_map(vm,
278 if (!vm->enable_ctag) 192 &os_buf,
279 binfo.ctag_lines = 0; 193 nvgpu_sgt,
280 194 map_addr,
281 gk20a_get_comptags(&os_buf, &comptags); 195 mapping_size,
282 196 buffer_offset,
283 if (binfo.ctag_lines && !comptags.lines) { 197 rw_flag,
284 /* allocate compression resources if needed */ 198 flags,
285 err = gk20a_alloc_comptags(g, &os_buf, 199 compr_kind,
286 &g->gr.comp_tags, 200 incompr_kind,
287 binfo.ctag_lines); 201 batch,
288 if (err) { 202 gk20a_dmabuf_aperture(g, dmabuf));
289 /* TBD: we can partially alloc ctags as well... */
290
291 /* prevent compression ... */
292 binfo.compr_kind = NV_KIND_INVALID;
293
294 /* ... and make sure we have the fallback */
295 if (binfo.incompr_kind == NV_KIND_INVALID) {
296 nvgpu_err(g, "comptag alloc failed and no fallback kind specified");
297 goto clean_up;
298 }
299 } else {
300 gk20a_get_comptags(&os_buf, &comptags);
301
302 if (g->ops.ltc.cbc_ctrl)
303 g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_clear,
304 comptags.offset,
305 comptags.offset +
306 comptags.allocated_lines - 1);
307 else
308 clear_ctags = true;
309 }
310 }
311
312 /*
313 * Calculate comptag index for this mapping. Differs in
314 * case of partial mapping.
315 */
316 ctag_offset = comptags.offset;
317 if (ctag_offset)
318 ctag_offset += buffer_offset >>
319 ilog2(g->ops.fb.compression_page_size(g));
320
321 /* update gmmu ptes */
322 map_offset = g->ops.mm.gmmu_map(vm,
323 map_offset,
324 nvgpu_sgt,
325 buffer_offset, /* sg offset */
326 mapping_size,
327 binfo.pgsz_idx,
328 (binfo.compr_kind != NV_KIND_INVALID ?
329 binfo.compr_kind : binfo.incompr_kind),
330 ctag_offset,
331 flags, rw_flag,
332 clear_ctags,
333 false,
334 false,
335 batch,
336 aperture);
337 if (!map_offset)
338 goto clean_up;
339 203
340 nvgpu_sgt_free(g, nvgpu_sgt); 204 nvgpu_sgt_free(g, nvgpu_sgt);
341 205
342 mapped_buffer = nvgpu_kzalloc(g, sizeof(*mapped_buffer)); 206 if (IS_ERR(mapped_buffer)) {
343 if (!mapped_buffer) { 207 err = PTR_ERR(mapped_buffer);
344 nvgpu_warn(g, "oom allocating tracking buffer");
345 goto clean_up;
346 }
347 mapped_buffer->dmabuf = dmabuf;
348 mapped_buffer->sgt = sgt;
349 mapped_buffer->addr = map_offset;
350 mapped_buffer->size = mapping_size;
351 mapped_buffer->pgsz_idx = binfo.pgsz_idx;
352 mapped_buffer->ctag_offset = ctag_offset;
353 mapped_buffer->ctag_lines = binfo.ctag_lines;
354 mapped_buffer->ctag_allocated_lines = comptags.allocated_lines;
355 mapped_buffer->vm = vm;
356 mapped_buffer->flags = flags;
357 mapped_buffer->kind = map_key_kind;
358 mapped_buffer->va_allocated = va_allocated;
359 nvgpu_init_list_node(&mapped_buffer->buffer_list);
360 nvgpu_ref_init(&mapped_buffer->ref);
361
362 err = nvgpu_insert_mapped_buf(vm, mapped_buffer);
363 if (err) {
364 nvgpu_err(g, "failed to insert into mapped buffer tree");
365 goto clean_up; 208 goto clean_up;
366 } 209 }
367 210
368 vm->num_user_mapped_buffers++; 211 mapped_buffer->os_priv.dmabuf = dmabuf;
212 mapped_buffer->os_priv.sgt = sgt;
369 213
370 if (vm_area) { 214 *gpu_va = mapped_buffer->addr;
371 nvgpu_list_add_tail(&mapped_buffer->buffer_list,
372 &vm_area->buffer_list_head);
373 mapped_buffer->vm_area = vm_area;
374 }
375
376 nvgpu_mutex_release(&vm->update_gmmu_lock);
377
378 *gpu_va = map_offset;
379 return 0; 215 return 0;
380 216
381clean_up: 217clean_up:
382 nvgpu_kfree(g, mapped_buffer); 218 gk20a_mm_unpin(dev, dmabuf, sgt);
383 219
384 if (nvgpu_sgt)
385 nvgpu_sgt_free(g, nvgpu_sgt);
386 if (va_allocated)
387 __nvgpu_vm_free_va(vm, map_offset, binfo.pgsz_idx);
388 if (!IS_ERR(sgt))
389 gk20a_mm_unpin(dev, dmabuf, sgt);
390
391 nvgpu_mutex_release(&vm->update_gmmu_lock);
392 nvgpu_log_info(g, "err=%d", err);
393 return err; 220 return err;
394} 221}
395 222
@@ -407,8 +234,6 @@ int nvgpu_vm_map_buffer(struct vm_gk20a *vm,
407 struct dma_buf *dmabuf; 234 struct dma_buf *dmabuf;
408 u64 ret_va; 235 u64 ret_va;
409 236
410 gk20a_dbg_fn("");
411
412 /* get ref to the mem handle (released on unmap_locked) */ 237 /* get ref to the mem handle (released on unmap_locked) */
413 dmabuf = dma_buf_get(dmabuf_fd); 238 dmabuf = dma_buf_get(dmabuf_fd);
414 if (IS_ERR(dmabuf)) { 239 if (IS_ERR(dmabuf)) {
@@ -465,8 +290,8 @@ void nvgpu_vm_unmap_system(struct nvgpu_mapped_buf *mapped_buffer)
465{ 290{
466 struct vm_gk20a *vm = mapped_buffer->vm; 291 struct vm_gk20a *vm = mapped_buffer->vm;
467 292
468 gk20a_mm_unpin(dev_from_vm(vm), mapped_buffer->dmabuf, 293 gk20a_mm_unpin(dev_from_vm(vm), mapped_buffer->os_priv.dmabuf,
469 mapped_buffer->sgt); 294 mapped_buffer->os_priv.sgt);
470 295
471 dma_buf_put(mapped_buffer->dmabuf); 296 dma_buf_put(mapped_buffer->os_priv.dmabuf);
472} 297}
diff --git a/drivers/gpu/nvgpu/common/mm/vm.c b/drivers/gpu/nvgpu/common/mm/vm.c
index 46783e4e..97c6d4ca 100644
--- a/drivers/gpu/nvgpu/common/mm/vm.c
+++ b/drivers/gpu/nvgpu/common/mm/vm.c
@@ -20,6 +20,7 @@
20 * DEALINGS IN THE SOFTWARE. 20 * DEALINGS IN THE SOFTWARE.
21 */ 21 */
22 22
23#include <nvgpu/bug.h>
23#include <nvgpu/log.h> 24#include <nvgpu/log.h>
24#include <nvgpu/dma.h> 25#include <nvgpu/dma.h>
25#include <nvgpu/vm.h> 26#include <nvgpu/vm.h>
@@ -712,6 +713,249 @@ void nvgpu_vm_put_buffers(struct vm_gk20a *vm,
712 nvgpu_big_free(vm->mm->g, mapped_buffers); 713 nvgpu_big_free(vm->mm->g, mapped_buffers);
713} 714}
714 715
716struct nvgpu_mapped_buf *nvgpu_vm_map(struct vm_gk20a *vm,
717 struct nvgpu_os_buffer *os_buf,
718 struct nvgpu_sgt *sgt,
719 u64 map_addr,
720 u64 map_size,
721 u64 phys_offset,
722 int rw,
723 u32 flags,
724 s16 compr_kind,
725 s16 incompr_kind,
726 struct vm_gk20a_mapping_batch *batch,
727 enum nvgpu_aperture aperture)
728{
729 struct gk20a *g = gk20a_from_vm(vm);
730 struct nvgpu_mapped_buf *mapped_buffer = NULL;
731 struct nvgpu_ctag_buffer_info binfo = { 0 };
732 struct gk20a_comptags comptags;
733 struct nvgpu_vm_area *vm_area = NULL;
734 int err = 0;
735 u64 align;
736 u32 ctag_offset;
737 bool clear_ctags = false;
738 bool va_allocated = true;
739
740 /*
741 * The kind used as part of the key for map caching. HW may
742 * actually be programmed with the fallback kind in case the
743 * key kind is compressible but we're out of comptags.
744 */
745 s16 map_key_kind;
746
747 if (vm->userspace_managed &&
748 !(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)) {
749 nvgpu_err(g,
750 "non-fixed-offset mapping not available on "
751 "userspace managed address spaces");
752 return ERR_PTR(-EINVAL);
753 }
754
755 binfo.flags = flags;
756 binfo.size = nvgpu_os_buf_get_size(os_buf);
757 binfo.compr_kind = compr_kind;
758 binfo.incompr_kind = incompr_kind;
759
760 if (compr_kind != NV_KIND_INVALID)
761 map_key_kind = compr_kind;
762 else
763 map_key_kind = incompr_kind;
764
765 /*
766 * Check if this buffer is already mapped.
767 */
768 if (!vm->userspace_managed) {
769 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
770 mapped_buffer = nvgpu_vm_find_mapping(vm,
771 os_buf,
772 map_addr,
773 flags,
774 map_key_kind);
775 nvgpu_mutex_release(&vm->update_gmmu_lock);
776
777 if (mapped_buffer) {
778 nvgpu_ref_get(&mapped_buffer->ref);
779 return mapped_buffer;
780 }
781 }
782
783 /*
784 * Generate a new mapping!
785 */
786 mapped_buffer = nvgpu_kzalloc(g, sizeof(*mapped_buffer));
787 if (!mapped_buffer) {
788 nvgpu_warn(g, "oom allocating tracking buffer");
789 return ERR_PTR(-ENOMEM);
790 }
791
792 align = nvgpu_sgt_alignment(g, sgt);
793 if (g->mm.disable_bigpage)
794 binfo.pgsz_idx = gmmu_page_size_small;
795 else
796 binfo.pgsz_idx = __get_pte_size(vm, map_addr,
797 min_t(u64, binfo.size, align));
798 map_size = map_size ? map_size : binfo.size;
799 map_size = ALIGN(map_size, SZ_4K);
800
801 if ((map_size > binfo.size) ||
802 (phys_offset > (binfo.size - map_size))) {
803 err = -EINVAL;
804 goto clean_up_nolock;
805 }
806
807 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
808
809 /*
810 * Check if we should use a fixed offset for mapping this buffer.
811 */
812 if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
813 err = nvgpu_vm_area_validate_buffer(vm,
814 map_addr,
815 map_size,
816 binfo.pgsz_idx,
817 &vm_area);
818 if (err)
819 goto clean_up;
820
821 va_allocated = false;
822 }
823
824 err = nvgpu_vm_compute_compression(vm, &binfo);
825 if (err) {
826 nvgpu_err(g, "failure setting up compression");
827 goto clean_up;
828 }
829
830 /*
831 * bar1 and pmu VMs don't need ctags.
832 */
833 if (!vm->enable_ctag)
834 binfo.ctag_lines = 0;
835
836 gk20a_get_comptags(os_buf, &comptags);
837
838 if (binfo.ctag_lines && !comptags.lines) {
839 /*
840 * Allocate compression resources if needed.
841 */
842 if (gk20a_alloc_comptags(g,
843 os_buf,
844 &g->gr.comp_tags,
845 binfo.ctag_lines)) {
846
847 /*
848 * Prevent compression...
849 */
850 binfo.compr_kind = NV_KIND_INVALID;
851
852 /*
853 * ... And make sure we have a fallback.
854 */
855 if (binfo.incompr_kind == NV_KIND_INVALID) {
856 nvgpu_err(g, "comptag alloc failed and no "
857 "fallback kind specified");
858 err = -ENOMEM;
859
860 /*
861 * Any alloced comptags are cleaned up when the
862 * dmabuf is freed.
863 */
864 goto clean_up;
865 }
866 } else {
867 gk20a_get_comptags(os_buf, &comptags);
868
869 if (g->ops.ltc.cbc_ctrl)
870 g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_clear,
871 comptags.offset,
872 comptags.offset +
873 comptags.allocated_lines - 1);
874 else
875 clear_ctags = true;
876 }
877 }
878
879 /*
880 * Calculate comptag index for this mapping. Differs in case of partial
881 * mapping.
882 */
883 ctag_offset = comptags.offset;
884 if (ctag_offset)
885 ctag_offset += phys_offset >>
886 ilog2(g->ops.fb.compression_page_size(g));
887
888 map_addr = g->ops.mm.gmmu_map(vm,
889 map_addr,
890 sgt,
891 phys_offset,
892 map_size,
893 binfo.pgsz_idx,
894 binfo.compr_kind != NV_KIND_INVALID ?
895 binfo.compr_kind : binfo.incompr_kind,
896 ctag_offset,
897 flags,
898 rw,
899 clear_ctags,
900 false,
901 false,
902 batch,
903 aperture);
904 if (!map_addr) {
905 err = -ENOMEM;
906 goto clean_up;
907 }
908
909 nvgpu_init_list_node(&mapped_buffer->buffer_list);
910 nvgpu_ref_init(&mapped_buffer->ref);
911 mapped_buffer->addr = map_addr;
912 mapped_buffer->size = map_size;
913 mapped_buffer->pgsz_idx = binfo.pgsz_idx;
914 mapped_buffer->ctag_offset = ctag_offset;
915 mapped_buffer->ctag_lines = binfo.ctag_lines;
916 mapped_buffer->ctag_allocated_lines = comptags.allocated_lines;
917 mapped_buffer->vm = vm;
918 mapped_buffer->flags = flags;
919 mapped_buffer->kind = map_key_kind;
920 mapped_buffer->va_allocated = va_allocated;
921 mapped_buffer->vm_area = vm_area;
922
923 err = nvgpu_insert_mapped_buf(vm, mapped_buffer);
924 if (err) {
925 nvgpu_err(g, "failed to insert into mapped buffer tree");
926 goto clean_up;
927 }
928
929 vm->num_user_mapped_buffers++;
930
931 if (vm_area) {
932 nvgpu_list_add_tail(&mapped_buffer->buffer_list,
933 &vm_area->buffer_list_head);
934 mapped_buffer->vm_area = vm_area;
935 }
936
937 nvgpu_mutex_release(&vm->update_gmmu_lock);
938
939 return mapped_buffer;
940
941clean_up:
942 if (mapped_buffer->addr)
943 g->ops.mm.gmmu_unmap(vm,
944 mapped_buffer->addr,
945 mapped_buffer->size,
946 mapped_buffer->pgsz_idx,
947 mapped_buffer->va_allocated,
948 gk20a_mem_flag_none,
949 mapped_buffer->vm_area ?
950 mapped_buffer->vm_area->sparse : false,
951 NULL);
952 nvgpu_mutex_release(&vm->update_gmmu_lock);
953clean_up_nolock:
954 nvgpu_kfree(g, mapped_buffer);
955
956 return ERR_PTR(err);
957}
958
715/* 959/*
716 * Really unmap. This does the real GMMU unmap and removes the mapping from the 960 * Really unmap. This does the real GMMU unmap and removes the mapping from the
717 * VM map tracking tree (and vm_area list if necessary). 961 * VM map tracking tree (and vm_area list if necessary).
diff --git a/drivers/gpu/nvgpu/include/nvgpu/linux/vm.h b/drivers/gpu/nvgpu/include/nvgpu/linux/vm.h
index 39deeb69..d9f082af 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/linux/vm.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/linux/vm.h
@@ -44,6 +44,11 @@ struct nvgpu_os_buffer {
44 struct device *dev; 44 struct device *dev;
45}; 45};
46 46
47struct nvgpu_mapped_buf_priv {
48 struct dma_buf *dmabuf;
49 struct sg_table *sgt;
50};
51
47/* NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL must be set */ 52/* NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL must be set */
48int nvgpu_vm_map_linux(struct vm_gk20a *vm, 53int nvgpu_vm_map_linux(struct vm_gk20a *vm,
49 struct dma_buf *dmabuf, 54 struct dma_buf *dmabuf,
diff --git a/drivers/gpu/nvgpu/include/nvgpu/vm.h b/drivers/gpu/nvgpu/include/nvgpu/vm.h
index b91b41e4..d501b98f 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/vm.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/vm.h
@@ -37,6 +37,18 @@ struct vm_gk20a;
37struct nvgpu_vm_area; 37struct nvgpu_vm_area;
38struct gk20a_comptag_allocator; 38struct gk20a_comptag_allocator;
39 39
40/*
41 * Defined by each OS. Allows the common VM code do things to the OS specific
42 * buffer structures.
43 */
44struct nvgpu_os_buffer;
45
46#ifdef __KERNEL__
47#include <nvgpu/linux/vm.h>
48#else
49/* QNX include goes here. */
50#endif
51
40/** 52/**
41 * This header contains the OS agnostic APIs for dealing with VMs. Most of the 53 * This header contains the OS agnostic APIs for dealing with VMs. Most of the
42 * VM implementation is system specific - it must translate from a platform's 54 * VM implementation is system specific - it must translate from a platform's
@@ -89,13 +101,12 @@ struct nvgpu_mapped_buf {
89 struct vm_gk20a *vm; 101 struct vm_gk20a *vm;
90 struct nvgpu_vm_area *vm_area; 102 struct nvgpu_vm_area *vm_area;
91 103
104 struct nvgpu_ref ref;
105
92 struct nvgpu_rbtree_node node; 106 struct nvgpu_rbtree_node node;
93 struct nvgpu_list_node buffer_list; 107 struct nvgpu_list_node buffer_list;
94 u64 addr; 108 u64 addr;
95 u64 size; 109 u64 size;
96 struct dma_buf *dmabuf;
97 struct sg_table *sgt;
98 struct nvgpu_ref ref;
99 110
100 u32 pgsz_idx; 111 u32 pgsz_idx;
101 u32 ctag_offset; 112 u32 ctag_offset;
@@ -105,13 +116,16 @@ struct nvgpu_mapped_buf {
105 u32 flags; 116 u32 flags;
106 u32 kind; 117 u32 kind;
107 bool va_allocated; 118 bool va_allocated;
108};
109 119
110/* 120 /*
111 * Defined by each OS. Allows the common VM code do things to the OS specific 121 * Separate from the nvgpu_os_buffer struct to clearly distinguish
112 * buffer structures. 122 * lifetime. A nvgpu_mapped_buf_priv will _always_ be wrapped by a
113 */ 123 * struct nvgpu_mapped_buf; however, there are times when a struct
114struct nvgpu_os_buffer; 124 * nvgpu_os_buffer would be separate. This aims to prevent dangerous
125 * usage of container_of() or the like in OS code.
126 */
127 struct nvgpu_mapped_buf_priv os_priv;
128};
115 129
116static inline struct nvgpu_mapped_buf * 130static inline struct nvgpu_mapped_buf *
117nvgpu_mapped_buf_from_buffer_list(struct nvgpu_list_node *node) 131nvgpu_mapped_buf_from_buffer_list(struct nvgpu_list_node *node)
@@ -226,6 +240,25 @@ void nvgpu_vm_put_buffers(struct vm_gk20a *vm,
226 struct nvgpu_mapped_buf **mapped_buffers, 240 struct nvgpu_mapped_buf **mapped_buffers,
227 int num_buffers); 241 int num_buffers);
228 242
243struct nvgpu_mapped_buf *nvgpu_vm_find_mapping(struct vm_gk20a *vm,
244 struct nvgpu_os_buffer *os_buf,
245 u64 map_addr,
246 u32 flags,
247 int kind);
248
249struct nvgpu_mapped_buf *nvgpu_vm_map(struct vm_gk20a *vm,
250 struct nvgpu_os_buffer *os_buf,
251 struct nvgpu_sgt *sgt,
252 u64 map_addr,
253 u64 map_size,
254 u64 phys_offset,
255 int rw,
256 u32 flags,
257 s16 compr_kind,
258 s16 incompr_kind,
259 struct vm_gk20a_mapping_batch *batch,
260 enum nvgpu_aperture aperture);
261
229void nvgpu_vm_unmap(struct vm_gk20a *vm, u64 offset, 262void nvgpu_vm_unmap(struct vm_gk20a *vm, u64 offset,
230 struct vm_gk20a_mapping_batch *batch); 263 struct vm_gk20a_mapping_batch *batch);
231 264
@@ -240,6 +273,8 @@ void nvgpu_vm_unmap_system(struct nvgpu_mapped_buf *mapped_buffer);
240 */ 273 */
241void __nvgpu_vm_unmap_ref(struct nvgpu_ref *ref); 274void __nvgpu_vm_unmap_ref(struct nvgpu_ref *ref);
242 275
276u64 nvgpu_os_buf_get_size(struct nvgpu_os_buffer *os_buf);
277
243/* 278/*
244 * These all require the VM update lock to be held. 279 * These all require the VM update lock to be held.
245 */ 280 */