summaryrefslogtreecommitdiffstats
path: root/drivers/gpu
diff options
context:
space:
mode:
authorAlex Waterman <alexw@nvidia.com>2017-04-24 18:26:00 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-05-19 18:34:06 -0400
commit014ace5a85f274de7debb4c6168d69c803445e19 (patch)
tree4028be3294b95e38659f1ebba4a14457748e59f1 /drivers/gpu
parentd37e8f7dcf190f31f9c0c12583db2bb0c0d313c0 (diff)
gpu: nvgpu: Split VM implementation out
This patch begins splitting out the VM implementation from mm_gk20a.c and moves it to common/linux/vm.c and common/mm/vm.c. This split is necessary because the VM code has two portions: first, an interface for the OS specific code to use (i.e userspace mappings), and second, a set of APIs for the driver to use (init, cleanup, etc) which are not OS specific. This is only the beginning of the split - there's still a lot of things that need to be carefully moved around. JIRA NVGPU-12 JIRA NVGPU-30 Change-Id: I3b57cba245d7daf9e4326a143b9c6217e0f28c96 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: http://git-master/r/1477743 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/nvgpu/Makefile.nvgpu1
-rw-r--r--drivers/gpu/nvgpu/common/linux/vm.c421
-rw-r--r--drivers/gpu/nvgpu/common/linux/vm_priv.h36
-rw-r--r--drivers/gpu/nvgpu/common/mm/vm.c65
-rw-r--r--drivers/gpu/nvgpu/gk20a/cde_gk20a.c2
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c4
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.h2
-rw-r--r--drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c4
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c622
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.h52
-rw-r--r--drivers/gpu/nvgpu/gp10b/mm_gp10b.c2
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/vm.h78
-rw-r--r--drivers/gpu/nvgpu/vgpu/mm_vgpu.c2
13 files changed, 661 insertions, 630 deletions
diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu b/drivers/gpu/nvgpu/Makefile.nvgpu
index 0a60eece..2f9d1b36 100644
--- a/drivers/gpu/nvgpu/Makefile.nvgpu
+++ b/drivers/gpu/nvgpu/Makefile.nvgpu
@@ -39,6 +39,7 @@ nvgpu-y := \
39 common/linux/driver_common.o \ 39 common/linux/driver_common.o \
40 common/linux/firmware.o \ 40 common/linux/firmware.o \
41 common/linux/thread.o \ 41 common/linux/thread.o \
42 common/linux/vm.o \
42 common/mm/nvgpu_allocator.o \ 43 common/mm/nvgpu_allocator.o \
43 common/mm/bitmap_allocator.o \ 44 common/mm/bitmap_allocator.o \
44 common/mm/buddy_allocator.o \ 45 common/mm/buddy_allocator.o \
diff --git a/drivers/gpu/nvgpu/common/linux/vm.c b/drivers/gpu/nvgpu/common/linux/vm.c
new file mode 100644
index 00000000..8b9d6f96
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/vm.c
@@ -0,0 +1,421 @@
1/*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/dma-buf.h>
18#include <linux/scatterlist.h>
19
20#include <nvgpu/log.h>
21#include <nvgpu/lock.h>
22#include <nvgpu/rbtree.h>
23#include <nvgpu/page_allocator.h>
24
25#include "gk20a/gk20a.h"
26#include "gk20a/mm_gk20a.h"
27
28#include "vm_priv.h"
29
30static struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf_reverse(
31 struct vm_gk20a *vm, struct dma_buf *dmabuf, u32 kind)
32{
33 struct nvgpu_rbtree_node *node = NULL;
34 struct nvgpu_rbtree_node *root = vm->mapped_buffers;
35
36 nvgpu_rbtree_enum_start(0, &node, root);
37
38 while (node) {
39 struct nvgpu_mapped_buf *mapped_buffer =
40 mapped_buffer_from_rbtree_node(node);
41
42 if (mapped_buffer->dmabuf == dmabuf &&
43 kind == mapped_buffer->kind)
44 return mapped_buffer;
45
46 nvgpu_rbtree_enum_next(&node, node);
47 }
48
49 return NULL;
50}
51
52/*
53 * Determine alignment for a passed buffer. Necessary since the buffer may
54 * appear big to map with large pages but the SGL may have chunks that are not
55 * aligned on a 64/128kB large page boundary.
56 */
57static u64 nvgpu_get_buffer_alignment(struct gk20a *g, struct scatterlist *sgl,
58 enum nvgpu_aperture aperture)
59{
60 u64 align = 0, chunk_align = 0;
61 u64 buf_addr;
62
63 if (aperture == APERTURE_VIDMEM) {
64 struct nvgpu_page_alloc *alloc = get_vidmem_page_alloc(sgl);
65 struct page_alloc_chunk *chunk = NULL;
66
67 nvgpu_list_for_each_entry(chunk, &alloc->alloc_chunks,
68 page_alloc_chunk, list_entry) {
69 chunk_align = 1ULL << __ffs(chunk->base |
70 chunk->length);
71
72 if (align)
73 align = min(align, chunk_align);
74 else
75 align = chunk_align;
76 }
77
78 return align;
79 }
80
81 buf_addr = (u64)sg_dma_address(sgl);
82
83 if (g->mm.bypass_smmu || buf_addr == DMA_ERROR_CODE || !buf_addr) {
84 while (sgl) {
85 buf_addr = (u64)sg_phys(sgl);
86 chunk_align = 1ULL << __ffs(buf_addr |
87 (u64)sgl->length);
88
89 if (align)
90 align = min(align, chunk_align);
91 else
92 align = chunk_align;
93 sgl = sg_next(sgl);
94 }
95
96 return align;
97 }
98
99 align = 1ULL << __ffs(buf_addr);
100
101 return align;
102}
103
104/*
105 * vm->update_gmmu_lock must be held. This checks to see if we already have
106 * mapped the passed buffer into this VM. If so, just return the existing
107 * mapping address.
108 */
109static u64 __nvgpu_vm_find_mapping(struct vm_gk20a *vm,
110 struct dma_buf *dmabuf,
111 u64 offset_align,
112 u32 flags,
113 int kind,
114 bool user_mapped,
115 int rw_flag)
116{
117 struct gk20a *g = gk20a_from_vm(vm);
118 struct nvgpu_mapped_buf *mapped_buffer = NULL;
119
120 if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
121 mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, offset_align);
122 if (!mapped_buffer)
123 return 0;
124
125 if (mapped_buffer->dmabuf != dmabuf ||
126 mapped_buffer->kind != (u32)kind)
127 return 0;
128 } else {
129 mapped_buffer =
130 __nvgpu_vm_find_mapped_buf_reverse(vm, dmabuf, kind);
131 if (!mapped_buffer)
132 return 0;
133 }
134
135 if (mapped_buffer->flags != flags)
136 return 0;
137
138 /* mark the buffer as used */
139 if (user_mapped) {
140 if (mapped_buffer->user_mapped == 0)
141 vm->num_user_mapped_buffers++;
142 mapped_buffer->user_mapped++;
143
144 /* If the mapping comes from user space, we own
145 * the handle ref. Since we reuse an
146 * existing mapping here, we need to give back those
147 * refs once in order not to leak.
148 */
149 if (mapped_buffer->own_mem_ref)
150 dma_buf_put(mapped_buffer->dmabuf);
151 else
152 mapped_buffer->own_mem_ref = true;
153 }
154 kref_get(&mapped_buffer->ref);
155
156 nvgpu_log(g, gpu_dbg_map,
157 "gv: 0x%04x_%08x + 0x%-7zu "
158 "[dma: 0x%02x_%08x, pa: 0x%02x_%08x] "
159 "pgsz=%-3dKb as=%-2d ctags=%d start=%d "
160 "flags=0x%x apt=%s (reused)",
161 u64_hi32(mapped_buffer->addr), u64_lo32(mapped_buffer->addr),
162 dmabuf->size,
163 u64_hi32((u64)sg_dma_address(mapped_buffer->sgt->sgl)),
164 u64_lo32((u64)sg_dma_address(mapped_buffer->sgt->sgl)),
165 u64_hi32((u64)sg_phys(mapped_buffer->sgt->sgl)),
166 u64_lo32((u64)sg_phys(mapped_buffer->sgt->sgl)),
167 vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10,
168 vm_aspace_id(vm),
169 mapped_buffer->ctag_lines, mapped_buffer->ctag_offset,
170 mapped_buffer->flags,
171 nvgpu_aperture_str(gk20a_dmabuf_aperture(g, dmabuf)));
172
173 return mapped_buffer->addr;
174}
175
176u64 nvgpu_vm_map(struct vm_gk20a *vm,
177 struct dma_buf *dmabuf,
178 u64 offset_align,
179 u32 flags,
180 int kind,
181 bool user_mapped,
182 int rw_flag,
183 u64 buffer_offset,
184 u64 mapping_size,
185 struct vm_gk20a_mapping_batch *batch)
186{
187 struct gk20a *g = gk20a_from_vm(vm);
188 struct gk20a_comptag_allocator *ctag_allocator = &g->gr.comp_tags;
189 struct nvgpu_mapped_buf *mapped_buffer = NULL;
190 bool inserted = false, va_allocated = false;
191 u64 map_offset = 0;
192 int err = 0;
193 struct buffer_attrs bfr = {NULL};
194 struct gk20a_comptags comptags;
195 bool clear_ctags = false;
196 struct scatterlist *sgl;
197 u64 ctag_map_win_size = 0;
198 u32 ctag_map_win_ctagline = 0;
199 struct vm_reserved_va_node *va_node = NULL;
200 u32 ctag_offset;
201 enum nvgpu_aperture aperture;
202
203 if (user_mapped && vm->userspace_managed &&
204 !(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)) {
205 nvgpu_err(g, "non-fixed-offset mapping not available on "
206 "userspace managed address spaces");
207 return -EFAULT;
208 }
209
210 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
211
212 /* check if this buffer is already mapped */
213 if (!vm->userspace_managed) {
214 map_offset = __nvgpu_vm_find_mapping(
215 vm, dmabuf, offset_align,
216 flags, kind,
217 user_mapped, rw_flag);
218 if (map_offset) {
219 nvgpu_mutex_release(&vm->update_gmmu_lock);
220 return map_offset;
221 }
222 }
223
224 /* pin buffer to get phys/iovmm addr */
225 bfr.sgt = gk20a_mm_pin(g->dev, dmabuf);
226 if (IS_ERR(bfr.sgt)) {
227 /* Falling back to physical is actually possible
228 * here in many cases if we use 4K phys pages in the
229 * gmmu. However we have some regions which require
230 * contig regions to work properly (either phys-contig
231 * or contig through smmu io_vaspace). Until we can
232 * track the difference between those two cases we have
233 * to fail the mapping when we run out of SMMU space.
234 */
235 nvgpu_warn(g, "oom allocating tracking buffer");
236 goto clean_up;
237 }
238
239 bfr.kind_v = kind;
240 bfr.size = dmabuf->size;
241 sgl = bfr.sgt->sgl;
242
243 aperture = gk20a_dmabuf_aperture(g, dmabuf);
244 if (aperture == APERTURE_INVALID) {
245 err = -EINVAL;
246 goto clean_up;
247 }
248
249 if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)
250 map_offset = offset_align;
251
252 bfr.align = nvgpu_get_buffer_alignment(g, sgl, aperture);
253 bfr.pgsz_idx = __get_pte_size(vm, map_offset,
254 min_t(u64, bfr.size, bfr.align));
255 mapping_size = mapping_size ? mapping_size : bfr.size;
256
257 /* Check if we should use a fixed offset for mapping this buffer */
258 if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
259 err = validate_fixed_buffer(vm, &bfr,
260 offset_align, mapping_size,
261 &va_node);
262 if (err)
263 goto clean_up;
264
265 map_offset = offset_align;
266 va_allocated = false;
267 } else
268 va_allocated = true;
269
270 err = setup_buffer_kind_and_compression(vm, flags, &bfr, bfr.pgsz_idx);
271 if (unlikely(err)) {
272 nvgpu_err(g, "failure setting up kind and compression");
273 goto clean_up;
274 }
275
276 /* bar1 and pmu vm don't need ctag */
277 if (!vm->enable_ctag)
278 bfr.ctag_lines = 0;
279
280 gk20a_get_comptags(g->dev, dmabuf, &comptags);
281
282 /* ensure alignment to compression page size if compression enabled */
283 if (bfr.ctag_offset)
284 mapping_size = ALIGN(mapping_size,
285 g->ops.fb.compression_page_size(g));
286
287 if (bfr.ctag_lines && !comptags.lines) {
288 const bool user_mappable =
289 !!(flags & NVGPU_AS_MAP_BUFFER_FLAGS_MAPPABLE_COMPBITS);
290
291 /* allocate compression resources if needed */
292 err = gk20a_alloc_comptags(g, g->dev, dmabuf, ctag_allocator,
293 bfr.ctag_lines, user_mappable,
294 &ctag_map_win_size,
295 &ctag_map_win_ctagline);
296 if (err) {
297 /* ok to fall back here if we ran out */
298 /* TBD: we can partially alloc ctags as well... */
299 bfr.kind_v = bfr.uc_kind_v;
300 } else {
301 gk20a_get_comptags(g->dev, dmabuf, &comptags);
302
303 if (g->ops.ltc.cbc_ctrl)
304 g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_clear,
305 comptags.offset,
306 comptags.offset +
307 comptags.allocated_lines - 1);
308 else
309 clear_ctags = true;
310 }
311 }
312
313 /* store the comptag info */
314 bfr.ctag_offset = comptags.offset;
315 bfr.ctag_lines = comptags.lines;
316 bfr.ctag_allocated_lines = comptags.allocated_lines;
317 bfr.ctag_user_mappable = comptags.user_mappable;
318
319 /*
320 * Calculate comptag index for this mapping. Differs in
321 * case of partial mapping.
322 */
323 ctag_offset = comptags.offset;
324 if (ctag_offset)
325 ctag_offset += buffer_offset >>
326 ilog2(g->ops.fb.compression_page_size(g));
327
328 /* update gmmu ptes */
329 map_offset = g->ops.mm.gmmu_map(vm, map_offset,
330 bfr.sgt,
331 buffer_offset, /* sg offset */
332 mapping_size,
333 bfr.pgsz_idx,
334 bfr.kind_v,
335 ctag_offset,
336 flags, rw_flag,
337 clear_ctags,
338 false,
339 false,
340 batch,
341 aperture);
342 if (!map_offset)
343 goto clean_up;
344
345 mapped_buffer = nvgpu_kzalloc(g, sizeof(*mapped_buffer));
346 if (!mapped_buffer) {
347 nvgpu_warn(g, "oom allocating tracking buffer");
348 goto clean_up;
349 }
350 mapped_buffer->dmabuf = dmabuf;
351 mapped_buffer->sgt = bfr.sgt;
352 mapped_buffer->addr = map_offset;
353 mapped_buffer->size = mapping_size;
354 mapped_buffer->pgsz_idx = bfr.pgsz_idx;
355 mapped_buffer->ctag_offset = bfr.ctag_offset;
356 mapped_buffer->ctag_lines = bfr.ctag_lines;
357 mapped_buffer->ctag_allocated_lines = bfr.ctag_allocated_lines;
358 mapped_buffer->ctags_mappable = bfr.ctag_user_mappable;
359 mapped_buffer->ctag_map_win_size = ctag_map_win_size;
360 mapped_buffer->ctag_map_win_ctagline = ctag_map_win_ctagline;
361 mapped_buffer->vm = vm;
362 mapped_buffer->flags = flags;
363 mapped_buffer->kind = kind;
364 mapped_buffer->va_allocated = va_allocated;
365 mapped_buffer->user_mapped = user_mapped ? 1 : 0;
366 mapped_buffer->own_mem_ref = user_mapped;
367 nvgpu_init_list_node(&mapped_buffer->buffer_list);
368 kref_init(&mapped_buffer->ref);
369
370 err = nvgpu_insert_mapped_buf(vm, mapped_buffer);
371 if (err) {
372 nvgpu_err(g, "failed to insert into mapped buffer tree");
373 goto clean_up;
374 }
375 inserted = true;
376 if (user_mapped)
377 vm->num_user_mapped_buffers++;
378
379 if (va_node) {
380 nvgpu_list_add_tail(&mapped_buffer->buffer_list,
381 &va_node->buffer_list_head);
382 mapped_buffer->va_node = va_node;
383 }
384
385 nvgpu_mutex_release(&vm->update_gmmu_lock);
386
387 return map_offset;
388
389clean_up:
390 if (inserted) {
391 nvgpu_remove_mapped_buf(vm, mapped_buffer);
392 if (user_mapped)
393 vm->num_user_mapped_buffers--;
394 }
395 nvgpu_kfree(g, mapped_buffer);
396 if (va_allocated)
397 gk20a_vm_free_va(vm, map_offset, bfr.size, bfr.pgsz_idx);
398 if (!IS_ERR(bfr.sgt))
399 gk20a_mm_unpin(g->dev, dmabuf, bfr.sgt);
400
401 nvgpu_mutex_release(&vm->update_gmmu_lock);
402 nvgpu_log_info(g, "err=%d\n", err);
403 return 0;
404}
405
406void nvgpu_vm_unmap(struct vm_gk20a *vm, u64 offset)
407{
408 struct gk20a *g = vm->mm->g;
409 struct nvgpu_mapped_buf *mapped_buffer;
410
411 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
412 mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, offset);
413 if (!mapped_buffer) {
414 nvgpu_mutex_release(&vm->update_gmmu_lock);
415 nvgpu_err(g, "invalid addr to unmap 0x%llx", offset);
416 return;
417 }
418
419 kref_put(&mapped_buffer->ref, gk20a_vm_unmap_locked_kref);
420 nvgpu_mutex_release(&vm->update_gmmu_lock);
421}
diff --git a/drivers/gpu/nvgpu/common/linux/vm_priv.h b/drivers/gpu/nvgpu/common/linux/vm_priv.h
index c0fb0ffe..9e064d76 100644
--- a/drivers/gpu/nvgpu/common/linux/vm_priv.h
+++ b/drivers/gpu/nvgpu/common/linux/vm_priv.h
@@ -25,12 +25,24 @@ struct dma_buf;
25struct vm_gk20a; 25struct vm_gk20a;
26struct vm_gk20a_mapping_batch; 26struct vm_gk20a_mapping_batch;
27 27
28struct buffer_attrs {
29 struct sg_table *sgt;
30 u64 size;
31 u64 align;
32 u32 ctag_offset;
33 u32 ctag_lines;
34 u32 ctag_allocated_lines;
35 int pgsz_idx;
36 u8 kind_v;
37 u8 uc_kind_v;
38 bool ctag_user_mappable;
39};
40
28u64 nvgpu_vm_map(struct vm_gk20a *vm, 41u64 nvgpu_vm_map(struct vm_gk20a *vm,
29 struct dma_buf *dmabuf, 42 struct dma_buf *dmabuf,
30 u64 offset_align, 43 u64 offset_align,
31 u32 flags /*NVGPU_AS_MAP_BUFFER_FLAGS_*/, 44 u32 flags,
32 int kind, 45 int kind,
33 struct sg_table **sgt,
34 bool user_mapped, 46 bool user_mapped,
35 int rw_flag, 47 int rw_flag,
36 u64 buffer_offset, 48 u64 buffer_offset,
@@ -59,4 +71,24 @@ void nvgpu_vm_unmap(struct vm_gk20a *vm, u64 offset);
59int nvgpu_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va, 71int nvgpu_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va,
60 struct dma_buf **dmabuf, 72 struct dma_buf **dmabuf,
61 u64 *offset); 73 u64 *offset);
74
75enum nvgpu_aperture gk20a_dmabuf_aperture(struct gk20a *g,
76 struct dma_buf *dmabuf);
77int validate_fixed_buffer(struct vm_gk20a *vm,
78 struct buffer_attrs *bfr,
79 u64 map_offset, u64 map_size,
80 struct vm_reserved_va_node **pva_node);
81int setup_buffer_kind_and_compression(struct vm_gk20a *vm,
82 u32 flags,
83 struct buffer_attrs *bfr,
84 enum gmmu_pgsz_gk20a pgsz_idx);
85int gk20a_alloc_comptags(struct gk20a *g,
86 struct device *dev,
87 struct dma_buf *dmabuf,
88 struct gk20a_comptag_allocator *allocator,
89 u32 lines, bool user_mappable,
90 u64 *ctag_map_win_size,
91 u32 *ctag_map_win_ctagline);
92void gk20a_vm_unmap_locked_kref(struct kref *ref);
93
62#endif 94#endif
diff --git a/drivers/gpu/nvgpu/common/mm/vm.c b/drivers/gpu/nvgpu/common/mm/vm.c
index eaf30fd0..635ac0fb 100644
--- a/drivers/gpu/nvgpu/common/mm/vm.c
+++ b/drivers/gpu/nvgpu/common/mm/vm.c
@@ -23,6 +23,11 @@
23#include "gk20a/gk20a.h" 23#include "gk20a/gk20a.h"
24#include "gk20a/mm_gk20a.h" 24#include "gk20a/mm_gk20a.h"
25 25
26int vm_aspace_id(struct vm_gk20a *vm)
27{
28 return vm->as_share ? vm->as_share->id : -1;
29}
30
26void nvgpu_vm_mapping_batch_start(struct vm_gk20a_mapping_batch *mapping_batch) 31void nvgpu_vm_mapping_batch_start(struct vm_gk20a_mapping_batch *mapping_batch)
27{ 32{
28 memset(mapping_batch, 0, sizeof(*mapping_batch)); 33 memset(mapping_batch, 0, sizeof(*mapping_batch));
@@ -52,7 +57,7 @@ void nvgpu_vm_mapping_batch_finish(struct vm_gk20a *vm,
52 57
53void nvgpu_vm_remove_support_nofree(struct vm_gk20a *vm) 58void nvgpu_vm_remove_support_nofree(struct vm_gk20a *vm)
54{ 59{
55 struct mapped_buffer_node *mapped_buffer; 60 struct nvgpu_mapped_buf *mapped_buffer;
56 struct vm_reserved_va_node *va_node, *va_node_tmp; 61 struct vm_reserved_va_node *va_node, *va_node_tmp;
57 struct nvgpu_rbtree_node *node = NULL; 62 struct nvgpu_rbtree_node *node = NULL;
58 struct gk20a *g = vm->mm->g; 63 struct gk20a *g = vm->mm->g;
@@ -118,7 +123,7 @@ void nvgpu_vm_put(struct vm_gk20a *vm)
118 kref_put(&vm->ref, nvgpu_vm_remove_support_kref); 123 kref_put(&vm->ref, nvgpu_vm_remove_support_kref);
119} 124}
120 125
121void gk20a_remove_vm(struct vm_gk20a *vm, struct nvgpu_mem *inst_block) 126void nvgpu_remove_vm(struct vm_gk20a *vm, struct nvgpu_mem *inst_block)
122{ 127{
123 struct gk20a *g = vm->mm->g; 128 struct gk20a *g = vm->mm->g;
124 129
@@ -127,3 +132,59 @@ void gk20a_remove_vm(struct vm_gk20a *vm, struct nvgpu_mem *inst_block)
127 gk20a_free_inst_block(g, inst_block); 132 gk20a_free_inst_block(g, inst_block);
128 nvgpu_vm_remove_support_nofree(vm); 133 nvgpu_vm_remove_support_nofree(vm);
129} 134}
135
136int nvgpu_insert_mapped_buf(struct vm_gk20a *vm,
137 struct nvgpu_mapped_buf *mapped_buffer)
138{
139 mapped_buffer->node.key_start = mapped_buffer->addr;
140 mapped_buffer->node.key_end = mapped_buffer->addr + mapped_buffer->size;
141
142 nvgpu_rbtree_insert(&mapped_buffer->node, &vm->mapped_buffers);
143
144 return 0;
145}
146
147void nvgpu_remove_mapped_buf(struct vm_gk20a *vm,
148 struct nvgpu_mapped_buf *mapped_buffer)
149{
150 nvgpu_rbtree_unlink(&mapped_buffer->node, &vm->mapped_buffers);
151}
152
153struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf(
154 struct vm_gk20a *vm, u64 addr)
155{
156 struct nvgpu_rbtree_node *node = NULL;
157 struct nvgpu_rbtree_node *root = vm->mapped_buffers;
158
159 nvgpu_rbtree_search(addr, &node, root);
160 if (!node)
161 return NULL;
162
163 return mapped_buffer_from_rbtree_node(node);
164}
165
166struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf_range(
167 struct vm_gk20a *vm, u64 addr)
168{
169 struct nvgpu_rbtree_node *node = NULL;
170 struct nvgpu_rbtree_node *root = vm->mapped_buffers;
171
172 nvgpu_rbtree_range_search(addr, &node, root);
173 if (!node)
174 return NULL;
175
176 return mapped_buffer_from_rbtree_node(node);
177}
178
179struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf_less_than(
180 struct vm_gk20a *vm, u64 addr)
181{
182 struct nvgpu_rbtree_node *node = NULL;
183 struct nvgpu_rbtree_node *root = vm->mapped_buffers;
184
185 nvgpu_rbtree_less_than_search(addr, &node, root);
186 if (!node)
187 return NULL;
188
189 return mapped_buffer_from_rbtree_node(node);
190}
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
index cf95019b..d2bb3ee9 100644
--- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
@@ -1025,7 +1025,7 @@ __releases(&cde_app->mutex)
1025 get_dma_buf(compbits_scatter_buf); /* a ref for nvgpu_vm_map */ 1025 get_dma_buf(compbits_scatter_buf); /* a ref for nvgpu_vm_map */
1026 map_vaddr = nvgpu_vm_map(cde_ctx->vm, compbits_scatter_buf, 0, 1026 map_vaddr = nvgpu_vm_map(cde_ctx->vm, compbits_scatter_buf, 0,
1027 NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, 1027 NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
1028 compbits_kind, NULL, true, 1028 compbits_kind, true,
1029 gk20a_mem_flag_none, 1029 gk20a_mem_flag_none,
1030 map_offset, map_size, 1030 map_offset, map_size,
1031 NULL); 1031 NULL);
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index b7fb363e..9e3bc05e 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -1406,7 +1406,7 @@ static void trace_write_pushbuffer(struct channel_gk20a *c,
1406 int err; 1406 int err;
1407 1407
1408 words = pbdma_gp_entry1_length_v(g->entry1); 1408 words = pbdma_gp_entry1_length_v(g->entry1);
1409 err = nvgpu_vm_find_buffer(c->vm, gpu_va, &dmabuf, &offset); 1409 err = nvgpu_vm_find_buf(c->vm, gpu_va, &dmabuf, &offset);
1410 if (!err) 1410 if (!err)
1411 mem = dma_buf_vmap(dmabuf); 1411 mem = dma_buf_vmap(dmabuf);
1412 } 1412 }
@@ -1903,7 +1903,7 @@ static int gk20a_channel_add_job(struct channel_gk20a *c,
1903 bool skip_buffer_refcounting) 1903 bool skip_buffer_refcounting)
1904{ 1904{
1905 struct vm_gk20a *vm = c->vm; 1905 struct vm_gk20a *vm = c->vm;
1906 struct mapped_buffer_node **mapped_buffers = NULL; 1906 struct nvgpu_mapped_buf **mapped_buffers = NULL;
1907 int err = 0, num_mapped_buffers = 0; 1907 int err = 0, num_mapped_buffers = 0;
1908 bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(c); 1908 bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(c);
1909 1909
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
index 1cbf7689..bbc1a72a 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -60,7 +60,7 @@ struct channel_ctx_gk20a {
60}; 60};
61 61
62struct channel_gk20a_job { 62struct channel_gk20a_job {
63 struct mapped_buffer_node **mapped_buffers; 63 struct nvgpu_mapped_buf **mapped_buffers;
64 int num_mapped_buffers; 64 int num_mapped_buffers;
65 struct gk20a_fence *pre_fence; 65 struct gk20a_fence *pre_fence;
66 struct gk20a_fence *post_fence; 66 struct gk20a_fence *post_fence;
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
index 5351750a..54317195 100644
--- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
@@ -1924,7 +1924,7 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
1924err_unmap: 1924err_unmap:
1925 nvgpu_vm_unmap_buffer(vm, args->offset, NULL); 1925 nvgpu_vm_unmap_buffer(vm, args->offset, NULL);
1926err_remove_vm: 1926err_remove_vm:
1927 gk20a_remove_vm(vm, &mm->perfbuf.inst_block); 1927 nvgpu_remove_vm(vm, &mm->perfbuf.inst_block);
1928 nvgpu_mutex_release(&g->dbg_sessions_lock); 1928 nvgpu_mutex_release(&g->dbg_sessions_lock);
1929 return err; 1929 return err;
1930} 1930}
@@ -1962,7 +1962,7 @@ static int gk20a_perfbuf_release_locked(struct gk20a *g, u64 offset)
1962 err = gk20a_perfbuf_disable_locked(g); 1962 err = gk20a_perfbuf_disable_locked(g);
1963 1963
1964 nvgpu_vm_unmap_buffer(vm, offset, NULL); 1964 nvgpu_vm_unmap_buffer(vm, offset, NULL);
1965 gk20a_remove_vm(vm, &mm->perfbuf.inst_block); 1965 nvgpu_remove_vm(vm, &mm->perfbuf.inst_block);
1966 1966
1967 g->perfbuf.owner = NULL; 1967 g->perfbuf.owner = NULL;
1968 g->perfbuf.offset = 0; 1968 g->perfbuf.offset = 0;
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 72a3ee13..84919d50 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -25,6 +25,7 @@
25#include <uapi/linux/nvgpu.h> 25#include <uapi/linux/nvgpu.h>
26#include <trace/events/gk20a.h> 26#include <trace/events/gk20a.h>
27 27
28#include <nvgpu/vm.h>
28#include <nvgpu/dma.h> 29#include <nvgpu/dma.h>
29#include <nvgpu/kmem.h> 30#include <nvgpu/kmem.h>
30#include <nvgpu/timers.h> 31#include <nvgpu/timers.h>
@@ -121,25 +122,6 @@ struct nvgpu_page_alloc *get_vidmem_page_alloc(struct scatterlist *sgl)
121 * 122 *
122 */ 123 */
123 124
124static inline int vm_aspace_id(struct vm_gk20a *vm)
125{
126 /* -1 is bar1 or pmu, etc. */
127 return vm->as_share ? vm->as_share->id : -1;
128}
129static inline u32 hi32(u64 f)
130{
131 return (u32)(f >> 32);
132}
133static inline u32 lo32(u64 f)
134{
135 return (u32)(f & 0xffffffff);
136}
137
138static struct mapped_buffer_node *find_mapped_buffer_locked(
139 struct nvgpu_rbtree_node *root, u64 addr);
140static struct mapped_buffer_node *find_mapped_buffer_reverse_locked(
141 struct nvgpu_rbtree_node *root, struct dma_buf *dmabuf,
142 u32 kind);
143static int update_gmmu_ptes_locked(struct vm_gk20a *vm, 125static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
144 enum gmmu_pgsz_gk20a pgsz_idx, 126 enum gmmu_pgsz_gk20a pgsz_idx,
145 struct sg_table *sgt, u64 buffer_offset, 127 struct sg_table *sgt, u64 buffer_offset,
@@ -316,13 +298,13 @@ void gk20a_get_comptags(struct device *dev, struct dma_buf *dmabuf,
316 *comptags = priv->comptags; 298 *comptags = priv->comptags;
317} 299}
318 300
319static int gk20a_alloc_comptags(struct gk20a *g, 301int gk20a_alloc_comptags(struct gk20a *g,
320 struct device *dev, 302 struct device *dev,
321 struct dma_buf *dmabuf, 303 struct dma_buf *dmabuf,
322 struct gk20a_comptag_allocator *allocator, 304 struct gk20a_comptag_allocator *allocator,
323 u32 lines, bool user_mappable, 305 u32 lines, bool user_mappable,
324 u64 *ctag_map_win_size, 306 u64 *ctag_map_win_size,
325 u32 *ctag_map_win_ctagline) 307 u32 *ctag_map_win_ctagline)
326{ 308{
327 struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev); 309 struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev);
328 u32 ctaglines_allocsize; 310 u32 ctaglines_allocsize;
@@ -493,9 +475,9 @@ static void gk20a_remove_mm_support(struct mm_gk20a *mm)
493 g->ops.mm.remove_bar2_vm(g); 475 g->ops.mm.remove_bar2_vm(g);
494 476
495 if (g->ops.mm.is_bar1_supported(g)) 477 if (g->ops.mm.is_bar1_supported(g))
496 gk20a_remove_vm(&mm->bar1.vm, &mm->bar1.inst_block); 478 nvgpu_remove_vm(&mm->bar1.vm, &mm->bar1.inst_block);
497 479
498 gk20a_remove_vm(&mm->pmu.vm, &mm->pmu.inst_block); 480 nvgpu_remove_vm(&mm->pmu.vm, &mm->pmu.inst_block);
499 gk20a_free_inst_block(gk20a_from_mm(mm), &mm->hwpm.inst_block); 481 gk20a_free_inst_block(gk20a_from_mm(mm), &mm->hwpm.inst_block);
500 nvgpu_vm_remove_support_nofree(&mm->cde.vm); 482 nvgpu_vm_remove_support_nofree(&mm->cde.vm);
501 483
@@ -1097,11 +1079,11 @@ static struct vm_reserved_va_node *addr_to_reservation(struct vm_gk20a *vm,
1097} 1079}
1098 1080
1099int nvgpu_vm_get_buffers(struct vm_gk20a *vm, 1081int nvgpu_vm_get_buffers(struct vm_gk20a *vm,
1100 struct mapped_buffer_node ***mapped_buffers, 1082 struct nvgpu_mapped_buf ***mapped_buffers,
1101 int *num_buffers) 1083 int *num_buffers)
1102{ 1084{
1103 struct mapped_buffer_node *mapped_buffer; 1085 struct nvgpu_mapped_buf *mapped_buffer;
1104 struct mapped_buffer_node **buffer_list; 1086 struct nvgpu_mapped_buf **buffer_list;
1105 struct nvgpu_rbtree_node *node = NULL; 1087 struct nvgpu_rbtree_node *node = NULL;
1106 int i = 0; 1088 int i = 0;
1107 1089
@@ -1141,15 +1123,15 @@ int nvgpu_vm_get_buffers(struct vm_gk20a *vm,
1141 return 0; 1123 return 0;
1142} 1124}
1143 1125
1144static void gk20a_vm_unmap_locked_kref(struct kref *ref) 1126void gk20a_vm_unmap_locked_kref(struct kref *ref)
1145{ 1127{
1146 struct mapped_buffer_node *mapped_buffer = 1128 struct nvgpu_mapped_buf *mapped_buffer =
1147 container_of(ref, struct mapped_buffer_node, ref); 1129 container_of(ref, struct nvgpu_mapped_buf, ref);
1148 nvgpu_vm_unmap_locked(mapped_buffer, mapped_buffer->vm->kref_put_batch); 1130 nvgpu_vm_unmap_locked(mapped_buffer, mapped_buffer->vm->kref_put_batch);
1149} 1131}
1150 1132
1151void nvgpu_vm_put_buffers(struct vm_gk20a *vm, 1133void nvgpu_vm_put_buffers(struct vm_gk20a *vm,
1152 struct mapped_buffer_node **mapped_buffers, 1134 struct nvgpu_mapped_buf **mapped_buffers,
1153 int num_buffers) 1135 int num_buffers)
1154{ 1136{
1155 int i; 1137 int i;
@@ -1177,11 +1159,11 @@ static void nvgpu_vm_unmap_user(struct vm_gk20a *vm, u64 offset,
1177 struct vm_gk20a_mapping_batch *batch) 1159 struct vm_gk20a_mapping_batch *batch)
1178{ 1160{
1179 struct gk20a *g = vm->mm->g; 1161 struct gk20a *g = vm->mm->g;
1180 struct mapped_buffer_node *mapped_buffer; 1162 struct nvgpu_mapped_buf *mapped_buffer;
1181 1163
1182 nvgpu_mutex_acquire(&vm->update_gmmu_lock); 1164 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
1183 1165
1184 mapped_buffer = find_mapped_buffer_locked(vm->mapped_buffers, offset); 1166 mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, offset);
1185 if (!mapped_buffer) { 1167 if (!mapped_buffer) {
1186 nvgpu_mutex_release(&vm->update_gmmu_lock); 1168 nvgpu_mutex_release(&vm->update_gmmu_lock);
1187 nvgpu_err(g, "invalid addr to unmap 0x%llx", offset); 1169 nvgpu_err(g, "invalid addr to unmap 0x%llx", offset);
@@ -1273,100 +1255,10 @@ int gk20a_vm_free_va(struct vm_gk20a *vm,
1273 return 0; 1255 return 0;
1274} 1256}
1275 1257
1276 1258int setup_buffer_kind_and_compression(struct vm_gk20a *vm,
1277static void remove_mapped_buffer(struct vm_gk20a *vm, 1259 u32 flags,
1278 struct mapped_buffer_node *mapped_buffer) 1260 struct buffer_attrs *bfr,
1279{ 1261 enum gmmu_pgsz_gk20a pgsz_idx)
1280 nvgpu_rbtree_unlink(&mapped_buffer->node, &vm->mapped_buffers);
1281}
1282
1283static int insert_mapped_buffer(struct vm_gk20a *vm,
1284 struct mapped_buffer_node *mapped_buffer)
1285{
1286 mapped_buffer->node.key_start = mapped_buffer->addr;
1287 mapped_buffer->node.key_end = mapped_buffer->addr + mapped_buffer->size;
1288
1289 nvgpu_rbtree_insert(&mapped_buffer->node, &vm->mapped_buffers);
1290
1291 return 0;
1292}
1293
1294static struct mapped_buffer_node *find_mapped_buffer_reverse_locked(
1295 struct nvgpu_rbtree_node *root, struct dma_buf *dmabuf,
1296 u32 kind)
1297{
1298 struct nvgpu_rbtree_node *node = NULL;
1299
1300 nvgpu_rbtree_enum_start(0, &node, root);
1301
1302 while (node) {
1303 struct mapped_buffer_node *mapped_buffer =
1304 mapped_buffer_from_rbtree_node(node);
1305
1306 if (mapped_buffer->dmabuf == dmabuf &&
1307 kind == mapped_buffer->kind)
1308 return mapped_buffer;
1309
1310 nvgpu_rbtree_enum_next(&node, node);
1311 }
1312
1313 return NULL;
1314}
1315
1316static struct mapped_buffer_node *find_mapped_buffer_locked(
1317 struct nvgpu_rbtree_node *root, u64 addr)
1318{
1319 struct nvgpu_rbtree_node *node = NULL;
1320
1321 nvgpu_rbtree_search(addr, &node, root);
1322 if (!node)
1323 return NULL;
1324
1325 return mapped_buffer_from_rbtree_node(node);
1326}
1327
1328static struct mapped_buffer_node *find_mapped_buffer_range_locked(
1329 struct nvgpu_rbtree_node *root, u64 addr)
1330{
1331 struct nvgpu_rbtree_node *node = NULL;
1332
1333 nvgpu_rbtree_range_search(addr, &node, root);
1334 if (!node)
1335 return NULL;
1336
1337 return mapped_buffer_from_rbtree_node(node);
1338}
1339
1340/* find the first mapped buffer with GPU VA less than addr */
1341static struct mapped_buffer_node *find_mapped_buffer_less_than_locked(
1342 struct nvgpu_rbtree_node *root, u64 addr)
1343{
1344 struct nvgpu_rbtree_node *node = NULL;
1345
1346 nvgpu_rbtree_less_than_search(addr, &node, root);
1347 if (!node)
1348 return NULL;
1349
1350 return mapped_buffer_from_rbtree_node(node);
1351}
1352
1353struct buffer_attrs {
1354 struct sg_table *sgt;
1355 u64 size;
1356 u64 align;
1357 u32 ctag_offset;
1358 u32 ctag_lines;
1359 u32 ctag_allocated_lines;
1360 int pgsz_idx;
1361 u8 kind_v;
1362 u8 uc_kind_v;
1363 bool ctag_user_mappable;
1364};
1365
1366static int setup_buffer_kind_and_compression(struct vm_gk20a *vm,
1367 u32 flags,
1368 struct buffer_attrs *bfr,
1369 enum gmmu_pgsz_gk20a pgsz_idx)
1370{ 1262{
1371 bool kind_compressible; 1263 bool kind_compressible;
1372 struct gk20a *g = gk20a_from_vm(vm); 1264 struct gk20a *g = gk20a_from_vm(vm);
@@ -1409,14 +1301,14 @@ static int setup_buffer_kind_and_compression(struct vm_gk20a *vm,
1409 return 0; 1301 return 0;
1410} 1302}
1411 1303
1412static int validate_fixed_buffer(struct vm_gk20a *vm, 1304int validate_fixed_buffer(struct vm_gk20a *vm,
1413 struct buffer_attrs *bfr, 1305 struct buffer_attrs *bfr,
1414 u64 map_offset, u64 map_size, 1306 u64 map_offset, u64 map_size,
1415 struct vm_reserved_va_node **pva_node) 1307 struct vm_reserved_va_node **pva_node)
1416{ 1308{
1417 struct gk20a *g = vm->mm->g; 1309 struct gk20a *g = vm->mm->g;
1418 struct vm_reserved_va_node *va_node; 1310 struct vm_reserved_va_node *va_node;
1419 struct mapped_buffer_node *buffer; 1311 struct nvgpu_mapped_buf *buffer;
1420 u64 map_end = map_offset + map_size; 1312 u64 map_end = map_offset + map_size;
1421 1313
1422 /* can wrap around with insane map_size; zero is disallowed too */ 1314 /* can wrap around with insane map_size; zero is disallowed too */
@@ -1448,8 +1340,8 @@ static int validate_fixed_buffer(struct vm_gk20a *vm,
1448 /* check that this mapping does not collide with existing 1340 /* check that this mapping does not collide with existing
1449 * mappings by checking the buffer with the highest GPU VA 1341 * mappings by checking the buffer with the highest GPU VA
1450 * that is less than our buffer end */ 1342 * that is less than our buffer end */
1451 buffer = find_mapped_buffer_less_than_locked( 1343 buffer = __nvgpu_vm_find_mapped_buf_less_than(
1452 vm->mapped_buffers, map_offset + map_size); 1344 vm, map_offset + map_size);
1453 if (buffer && buffer->addr + buffer->size > map_offset) { 1345 if (buffer && buffer->addr + buffer->size > map_offset) {
1454 nvgpu_warn(g, "overlapping buffer map requested"); 1346 nvgpu_warn(g, "overlapping buffer map requested");
1455 return -EINVAL; 1347 return -EINVAL;
@@ -1499,11 +1391,11 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
1499 "[dma: 0x%02x_%08x, pa: 0x%02x_%08x] " 1391 "[dma: 0x%02x_%08x, pa: 0x%02x_%08x] "
1500 "pgsz=%-3dKb as=%-2d ctags=%d start=%d " 1392 "pgsz=%-3dKb as=%-2d ctags=%d start=%d "
1501 "kind=0x%x flags=0x%x apt=%s", 1393 "kind=0x%x flags=0x%x apt=%s",
1502 hi32(map_offset), lo32(map_offset), size, 1394 u64_hi32(map_offset), u64_lo32(map_offset), size,
1503 sgt ? hi32((u64)sg_dma_address(sgt->sgl)) : 0, 1395 sgt ? u64_hi32((u64)sg_dma_address(sgt->sgl)) : 0,
1504 sgt ? lo32((u64)sg_dma_address(sgt->sgl)) : 0, 1396 sgt ? u64_lo32((u64)sg_dma_address(sgt->sgl)) : 0,
1505 sgt ? hi32((u64)sg_phys(sgt->sgl)) : 0, 1397 sgt ? u64_hi32((u64)sg_phys(sgt->sgl)) : 0,
1506 sgt ? lo32((u64)sg_phys(sgt->sgl)) : 0, 1398 sgt ? u64_lo32((u64)sg_phys(sgt->sgl)) : 0,
1507 vm->gmmu_page_sizes[pgsz_idx] >> 10, vm_aspace_id(vm), 1399 vm->gmmu_page_sizes[pgsz_idx] >> 10, vm_aspace_id(vm),
1508 ctag_lines, ctag_offset, 1400 ctag_lines, ctag_offset,
1509 kind_v, flags, nvgpu_aperture_str(aperture)); 1401 kind_v, flags, nvgpu_aperture_str(aperture));
@@ -1595,8 +1487,8 @@ void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm,
1595 } 1487 }
1596} 1488}
1597 1489
1598static enum nvgpu_aperture gk20a_dmabuf_aperture(struct gk20a *g, 1490enum nvgpu_aperture gk20a_dmabuf_aperture(struct gk20a *g,
1599 struct dma_buf *dmabuf) 1491 struct dma_buf *dmabuf)
1600{ 1492{
1601 struct gk20a *buf_owner = gk20a_vidmem_buf_owner(dmabuf); 1493 struct gk20a *buf_owner = gk20a_vidmem_buf_owner(dmabuf);
1602 if (buf_owner == NULL) { 1494 if (buf_owner == NULL) {
@@ -1617,80 +1509,6 @@ static enum nvgpu_aperture gk20a_dmabuf_aperture(struct gk20a *g,
1617 } 1509 }
1618} 1510}
1619 1511
1620static u64 nvgpu_vm_map_duplicate_locked(struct vm_gk20a *vm,
1621 struct dma_buf *dmabuf,
1622 u64 offset_align,
1623 u32 flags,
1624 int kind,
1625 struct sg_table **sgt,
1626 bool user_mapped,
1627 int rw_flag)
1628{
1629 struct gk20a *g = gk20a_from_vm(vm);
1630 struct mapped_buffer_node *mapped_buffer = NULL;
1631
1632 if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
1633 mapped_buffer = find_mapped_buffer_locked(vm->mapped_buffers,
1634 offset_align);
1635 if (!mapped_buffer)
1636 return 0;
1637
1638 if (mapped_buffer->dmabuf != dmabuf ||
1639 mapped_buffer->kind != (u32)kind)
1640 return 0;
1641 } else {
1642 mapped_buffer =
1643 find_mapped_buffer_reverse_locked(vm->mapped_buffers,
1644 dmabuf, kind);
1645 if (!mapped_buffer)
1646 return 0;
1647 }
1648
1649 if (mapped_buffer->flags != flags)
1650 return 0;
1651
1652 BUG_ON(mapped_buffer->vm != vm);
1653
1654 /* mark the buffer as used */
1655 if (user_mapped) {
1656 if (mapped_buffer->user_mapped == 0)
1657 vm->num_user_mapped_buffers++;
1658 mapped_buffer->user_mapped++;
1659
1660 /* If the mapping comes from user space, we own
1661 * the handle ref. Since we reuse an
1662 * existing mapping here, we need to give back those
1663 * refs once in order not to leak.
1664 */
1665 if (mapped_buffer->own_mem_ref)
1666 dma_buf_put(mapped_buffer->dmabuf);
1667 else
1668 mapped_buffer->own_mem_ref = true;
1669 }
1670 kref_get(&mapped_buffer->ref);
1671
1672 gk20a_dbg(gpu_dbg_map,
1673 "gv: 0x%04x_%08x + 0x%-7zu "
1674 "[dma: 0x%02x_%08x, pa: 0x%02x_%08x] "
1675 "pgsz=%-3dKb as=%-2d ctags=%d start=%d "
1676 "flags=0x%x apt=%s (reused)",
1677 hi32(mapped_buffer->addr), lo32(mapped_buffer->addr),
1678 dmabuf->size,
1679 hi32((u64)sg_dma_address(mapped_buffer->sgt->sgl)),
1680 lo32((u64)sg_dma_address(mapped_buffer->sgt->sgl)),
1681 hi32((u64)sg_phys(mapped_buffer->sgt->sgl)),
1682 lo32((u64)sg_phys(mapped_buffer->sgt->sgl)),
1683 vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10,
1684 vm_aspace_id(vm),
1685 mapped_buffer->ctag_lines, mapped_buffer->ctag_offset,
1686 mapped_buffer->flags,
1687 nvgpu_aperture_str(gk20a_dmabuf_aperture(g, dmabuf)));
1688
1689 if (sgt)
1690 *sgt = mapped_buffer->sgt;
1691 return mapped_buffer->addr;
1692}
1693
1694#if defined(CONFIG_GK20A_VIDMEM) 1512#if defined(CONFIG_GK20A_VIDMEM)
1695static struct sg_table *gk20a_vidbuf_map_dma_buf( 1513static struct sg_table *gk20a_vidbuf_map_dma_buf(
1696 struct dma_buf_attachment *attach, enum dma_data_direction dir) 1514 struct dma_buf_attachment *attach, enum dma_data_direction dir)
@@ -1919,310 +1737,6 @@ int gk20a_vidbuf_access_memory(struct gk20a *g, struct dma_buf *dmabuf,
1919#endif 1737#endif
1920} 1738}
1921 1739
1922static u64 gk20a_mm_get_align(struct gk20a *g, struct scatterlist *sgl,
1923 enum nvgpu_aperture aperture)
1924{
1925 u64 align = 0, chunk_align = 0;
1926 u64 buf_addr;
1927
1928 if (aperture == APERTURE_VIDMEM) {
1929 struct nvgpu_page_alloc *alloc = get_vidmem_page_alloc(sgl);
1930 struct page_alloc_chunk *chunk = NULL;
1931
1932 nvgpu_list_for_each_entry(chunk, &alloc->alloc_chunks,
1933 page_alloc_chunk, list_entry) {
1934 chunk_align = 1ULL << __ffs(chunk->base | chunk->length);
1935
1936 if (align)
1937 align = min(align, chunk_align);
1938 else
1939 align = chunk_align;
1940 }
1941
1942 return align;
1943 }
1944
1945 buf_addr = (u64)sg_dma_address(sgl);
1946
1947 if (g->mm.bypass_smmu || buf_addr == DMA_ERROR_CODE || !buf_addr) {
1948 while (sgl) {
1949 buf_addr = (u64)sg_phys(sgl);
1950 chunk_align = 1ULL << __ffs(buf_addr | (u64)sgl->length);
1951
1952 if (align)
1953 align = min(align, chunk_align);
1954 else
1955 align = chunk_align;
1956 sgl = sg_next(sgl);
1957 }
1958
1959 return align;
1960 }
1961
1962 align = 1ULL << __ffs(buf_addr);
1963
1964 return align;
1965}
1966
1967u64 nvgpu_vm_map(struct vm_gk20a *vm,
1968 struct dma_buf *dmabuf,
1969 u64 offset_align,
1970 u32 flags /*NVGPU_AS_MAP_BUFFER_FLAGS_*/,
1971 int kind,
1972 struct sg_table **sgt,
1973 bool user_mapped,
1974 int rw_flag,
1975 u64 buffer_offset,
1976 u64 mapping_size,
1977 struct vm_gk20a_mapping_batch *batch)
1978{
1979 struct gk20a *g = gk20a_from_vm(vm);
1980 struct gk20a_comptag_allocator *ctag_allocator = &g->gr.comp_tags;
1981 struct device *d = dev_from_vm(vm);
1982 struct mapped_buffer_node *mapped_buffer = NULL;
1983 bool inserted = false, va_allocated = false;
1984 u64 map_offset = 0;
1985 int err = 0;
1986 struct buffer_attrs bfr = {NULL};
1987 struct gk20a_comptags comptags;
1988 bool clear_ctags = false;
1989 struct scatterlist *sgl;
1990 u64 ctag_map_win_size = 0;
1991 u32 ctag_map_win_ctagline = 0;
1992 struct vm_reserved_va_node *va_node = NULL;
1993 u32 ctag_offset;
1994 enum nvgpu_aperture aperture;
1995
1996 if (user_mapped && vm->userspace_managed &&
1997 !(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)) {
1998 nvgpu_err(g,
1999 "%s: non-fixed-offset mapping not available on userspace managed address spaces",
2000 __func__);
2001 return -EFAULT;
2002 }
2003
2004 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
2005
2006 /* check if this buffer is already mapped */
2007 if (!vm->userspace_managed) {
2008 map_offset = nvgpu_vm_map_duplicate_locked(
2009 vm, dmabuf, offset_align,
2010 flags, kind, sgt,
2011 user_mapped, rw_flag);
2012 if (map_offset) {
2013 nvgpu_mutex_release(&vm->update_gmmu_lock);
2014 return map_offset;
2015 }
2016 }
2017
2018 /* pin buffer to get phys/iovmm addr */
2019 bfr.sgt = gk20a_mm_pin(d, dmabuf);
2020 if (IS_ERR(bfr.sgt)) {
2021 /* Falling back to physical is actually possible
2022 * here in many cases if we use 4K phys pages in the
2023 * gmmu. However we have some regions which require
2024 * contig regions to work properly (either phys-contig
2025 * or contig through smmu io_vaspace). Until we can
2026 * track the difference between those two cases we have
2027 * to fail the mapping when we run out of SMMU space.
2028 */
2029 nvgpu_warn(g, "oom allocating tracking buffer");
2030 goto clean_up;
2031 }
2032
2033 if (sgt)
2034 *sgt = bfr.sgt;
2035
2036 bfr.kind_v = kind;
2037 bfr.size = dmabuf->size;
2038 sgl = bfr.sgt->sgl;
2039
2040 aperture = gk20a_dmabuf_aperture(g, dmabuf);
2041 if (aperture == APERTURE_INVALID) {
2042 err = -EINVAL;
2043 goto clean_up;
2044 }
2045
2046 if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)
2047 map_offset = offset_align;
2048
2049 bfr.align = gk20a_mm_get_align(g, sgl, aperture);
2050 bfr.pgsz_idx = __get_pte_size(vm, map_offset,
2051 min_t(u64, bfr.size, bfr.align));
2052 mapping_size = mapping_size ? mapping_size : bfr.size;
2053
2054 /* Check if we should use a fixed offset for mapping this buffer */
2055 if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
2056 err = validate_fixed_buffer(vm, &bfr,
2057 offset_align, mapping_size,
2058 &va_node);
2059 if (err)
2060 goto clean_up;
2061
2062 map_offset = offset_align;
2063 va_allocated = false;
2064 } else
2065 va_allocated = true;
2066
2067 if (sgt)
2068 *sgt = bfr.sgt;
2069
2070 err = setup_buffer_kind_and_compression(vm, flags, &bfr, bfr.pgsz_idx);
2071 if (unlikely(err)) {
2072 nvgpu_err(g, "failure setting up kind and compression");
2073 goto clean_up;
2074 }
2075
2076 /* bar1 and pmu vm don't need ctag */
2077 if (!vm->enable_ctag)
2078 bfr.ctag_lines = 0;
2079
2080 gk20a_get_comptags(d, dmabuf, &comptags);
2081
2082 /* ensure alignment to compression page size if compression enabled */
2083 if (bfr.ctag_offset)
2084 mapping_size = ALIGN(mapping_size,
2085 g->ops.fb.compression_page_size(g));
2086
2087 if (bfr.ctag_lines && !comptags.lines) {
2088 const bool user_mappable =
2089 !!(flags & NVGPU_AS_MAP_BUFFER_FLAGS_MAPPABLE_COMPBITS);
2090
2091 /* allocate compression resources if needed */
2092 err = gk20a_alloc_comptags(g, d, dmabuf, ctag_allocator,
2093 bfr.ctag_lines, user_mappable,
2094 &ctag_map_win_size,
2095 &ctag_map_win_ctagline);
2096 if (err) {
2097 /* ok to fall back here if we ran out */
2098 /* TBD: we can partially alloc ctags as well... */
2099 bfr.kind_v = bfr.uc_kind_v;
2100 } else {
2101 gk20a_get_comptags(d, dmabuf, &comptags);
2102
2103 if (g->ops.ltc.cbc_ctrl)
2104 g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_clear,
2105 comptags.offset,
2106 comptags.offset +
2107 comptags.allocated_lines - 1);
2108 else
2109 clear_ctags = true;
2110 }
2111 }
2112
2113 /* store the comptag info */
2114 bfr.ctag_offset = comptags.offset;
2115 bfr.ctag_lines = comptags.lines;
2116 bfr.ctag_allocated_lines = comptags.allocated_lines;
2117 bfr.ctag_user_mappable = comptags.user_mappable;
2118
2119 /*
2120 * Calculate comptag index for this mapping. Differs in
2121 * case of partial mapping.
2122 */
2123 ctag_offset = comptags.offset;
2124 if (ctag_offset)
2125 ctag_offset += buffer_offset >>
2126 ilog2(g->ops.fb.compression_page_size(g));
2127
2128 /* update gmmu ptes */
2129 map_offset = g->ops.mm.gmmu_map(vm, map_offset,
2130 bfr.sgt,
2131 buffer_offset, /* sg offset */
2132 mapping_size,
2133 bfr.pgsz_idx,
2134 bfr.kind_v,
2135 ctag_offset,
2136 flags, rw_flag,
2137 clear_ctags,
2138 false,
2139 false,
2140 batch,
2141 aperture);
2142 if (!map_offset)
2143 goto clean_up;
2144
2145#if defined(NVHOST_DEBUG)
2146 {
2147 int i;
2148 struct scatterlist *sg = NULL;
2149 gk20a_dbg(gpu_dbg_pte, "for_each_sg(bfr.sgt->sgl, sg, bfr.sgt->nents, i)");
2150 for_each_sg(bfr.sgt->sgl, sg, bfr.sgt->nents, i ) {
2151 u64 da = sg_dma_address(sg);
2152 u64 pa = sg_phys(sg);
2153 u64 len = sg->length;
2154 gk20a_dbg(gpu_dbg_pte, "i=%d pa=0x%x,%08x da=0x%x,%08x len=0x%x,%08x",
2155 i, hi32(pa), lo32(pa), hi32(da), lo32(da),
2156 hi32(len), lo32(len));
2157 }
2158 }
2159#endif
2160
2161 /* keep track of the buffer for unmapping */
2162 /* TBD: check for multiple mapping of same buffer */
2163 mapped_buffer = nvgpu_kzalloc(g, sizeof(*mapped_buffer));
2164 if (!mapped_buffer) {
2165 nvgpu_warn(g, "oom allocating tracking buffer");
2166 goto clean_up;
2167 }
2168 mapped_buffer->dmabuf = dmabuf;
2169 mapped_buffer->sgt = bfr.sgt;
2170 mapped_buffer->addr = map_offset;
2171 mapped_buffer->size = mapping_size;
2172 mapped_buffer->pgsz_idx = bfr.pgsz_idx;
2173 mapped_buffer->ctag_offset = bfr.ctag_offset;
2174 mapped_buffer->ctag_lines = bfr.ctag_lines;
2175 mapped_buffer->ctag_allocated_lines = bfr.ctag_allocated_lines;
2176 mapped_buffer->ctags_mappable = bfr.ctag_user_mappable;
2177 mapped_buffer->ctag_map_win_size = ctag_map_win_size;
2178 mapped_buffer->ctag_map_win_ctagline = ctag_map_win_ctagline;
2179 mapped_buffer->vm = vm;
2180 mapped_buffer->flags = flags;
2181 mapped_buffer->kind = kind;
2182 mapped_buffer->va_allocated = va_allocated;
2183 mapped_buffer->user_mapped = user_mapped ? 1 : 0;
2184 mapped_buffer->own_mem_ref = user_mapped;
2185 nvgpu_init_list_node(&mapped_buffer->va_buffers_list);
2186 kref_init(&mapped_buffer->ref);
2187
2188 err = insert_mapped_buffer(vm, mapped_buffer);
2189 if (err) {
2190 nvgpu_err(g, "failed to insert into mapped buffer tree");
2191 goto clean_up;
2192 }
2193 inserted = true;
2194 if (user_mapped)
2195 vm->num_user_mapped_buffers++;
2196
2197 gk20a_dbg_info("allocated va @ 0x%llx", map_offset);
2198
2199 if (va_node) {
2200 nvgpu_list_add_tail(&mapped_buffer->va_buffers_list,
2201 &va_node->va_buffers_list);
2202 mapped_buffer->va_node = va_node;
2203 }
2204
2205 nvgpu_mutex_release(&vm->update_gmmu_lock);
2206
2207 return map_offset;
2208
2209clean_up:
2210 if (inserted) {
2211 remove_mapped_buffer(vm, mapped_buffer);
2212 if (user_mapped)
2213 vm->num_user_mapped_buffers--;
2214 }
2215 nvgpu_kfree(g, mapped_buffer);
2216 if (va_allocated)
2217 gk20a_vm_free_va(vm, map_offset, bfr.size, bfr.pgsz_idx);
2218 if (!IS_ERR(bfr.sgt))
2219 gk20a_mm_unpin(d, dmabuf, bfr.sgt);
2220
2221 nvgpu_mutex_release(&vm->update_gmmu_lock);
2222 gk20a_dbg_info("err=%d\n", err);
2223 return 0;
2224}
2225
2226int nvgpu_vm_get_compbits_info(struct vm_gk20a *vm, 1740int nvgpu_vm_get_compbits_info(struct vm_gk20a *vm,
2227 u64 mapping_gva, 1741 u64 mapping_gva,
2228 u64 *compbits_win_size, 1742 u64 *compbits_win_size,
@@ -2230,12 +1744,12 @@ int nvgpu_vm_get_compbits_info(struct vm_gk20a *vm,
2230 u32 *mapping_ctagline, 1744 u32 *mapping_ctagline,
2231 u32 *flags) 1745 u32 *flags)
2232{ 1746{
2233 struct mapped_buffer_node *mapped_buffer; 1747 struct nvgpu_mapped_buf *mapped_buffer;
2234 struct gk20a *g = vm->mm->g; 1748 struct gk20a *g = vm->mm->g;
2235 1749
2236 nvgpu_mutex_acquire(&vm->update_gmmu_lock); 1750 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
2237 1751
2238 mapped_buffer = find_mapped_buffer_locked(vm->mapped_buffers, mapping_gva); 1752 mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, mapping_gva);
2239 1753
2240 if (!mapped_buffer || !mapped_buffer->user_mapped) 1754 if (!mapped_buffer || !mapped_buffer->user_mapped)
2241 { 1755 {
@@ -2271,7 +1785,7 @@ int nvgpu_vm_map_compbits(struct vm_gk20a *vm,
2271 u64 *mapping_iova, 1785 u64 *mapping_iova,
2272 u32 flags) 1786 u32 flags)
2273{ 1787{
2274 struct mapped_buffer_node *mapped_buffer; 1788 struct nvgpu_mapped_buf *mapped_buffer;
2275 struct gk20a *g = gk20a_from_vm(vm); 1789 struct gk20a *g = gk20a_from_vm(vm);
2276 const bool fixed_mapping = 1790 const bool fixed_mapping =
2277 (flags & NVGPU_AS_MAP_BUFFER_COMPBITS_FLAGS_FIXED_OFFSET) != 0; 1791 (flags & NVGPU_AS_MAP_BUFFER_COMPBITS_FLAGS_FIXED_OFFSET) != 0;
@@ -2292,8 +1806,7 @@ int nvgpu_vm_map_compbits(struct vm_gk20a *vm,
2292 1806
2293 nvgpu_mutex_acquire(&vm->update_gmmu_lock); 1807 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
2294 1808
2295 mapped_buffer = 1809 mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, mapping_gva);
2296 find_mapped_buffer_locked(vm->mapped_buffers, mapping_gva);
2297 1810
2298 if (!mapped_buffer || !mapped_buffer->user_mapped) { 1811 if (!mapped_buffer || !mapped_buffer->user_mapped) {
2299 nvgpu_mutex_release(&vm->update_gmmu_lock); 1812 nvgpu_mutex_release(&vm->update_gmmu_lock);
@@ -2537,12 +2050,12 @@ static void gk20a_vidmem_clear_mem_worker(struct work_struct *work)
2537 2050
2538dma_addr_t gk20a_mm_gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr) 2051dma_addr_t gk20a_mm_gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr)
2539{ 2052{
2540 struct mapped_buffer_node *buffer; 2053 struct nvgpu_mapped_buf *buffer;
2541 dma_addr_t addr = 0; 2054 dma_addr_t addr = 0;
2542 struct gk20a *g = gk20a_from_vm(vm); 2055 struct gk20a *g = gk20a_from_vm(vm);
2543 2056
2544 nvgpu_mutex_acquire(&vm->update_gmmu_lock); 2057 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
2545 buffer = find_mapped_buffer_locked(vm->mapped_buffers, gpu_vaddr); 2058 buffer = __nvgpu_vm_find_mapped_buf(vm, gpu_vaddr);
2546 if (buffer) 2059 if (buffer)
2547 addr = g->ops.mm.get_iova_addr(g, buffer->sgt->sgl, 2060 addr = g->ops.mm.get_iova_addr(g, buffer->sgt->sgl,
2548 buffer->flags); 2061 buffer->flags);
@@ -3026,7 +2539,7 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
3026} 2539}
3027 2540
3028/* NOTE! mapped_buffers lock must be held */ 2541/* NOTE! mapped_buffers lock must be held */
3029void nvgpu_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer, 2542void nvgpu_vm_unmap_locked(struct nvgpu_mapped_buf *mapped_buffer,
3030 struct vm_gk20a_mapping_batch *batch) 2543 struct vm_gk20a_mapping_batch *batch)
3031{ 2544{
3032 struct vm_gk20a *vm = mapped_buffer->vm; 2545 struct vm_gk20a *vm = mapped_buffer->vm;
@@ -3057,7 +2570,7 @@ void nvgpu_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer,
3057 2570
3058 gk20a_dbg(gpu_dbg_map, 2571 gk20a_dbg(gpu_dbg_map,
3059 "gv: 0x%04x_%08x pgsz=%-3dKb as=%-2d own_mem_ref=%d", 2572 "gv: 0x%04x_%08x pgsz=%-3dKb as=%-2d own_mem_ref=%d",
3060 hi32(mapped_buffer->addr), lo32(mapped_buffer->addr), 2573 u64_hi32(mapped_buffer->addr), u64_lo32(mapped_buffer->addr),
3061 vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10, 2574 vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10,
3062 vm_aspace_id(vm), 2575 vm_aspace_id(vm),
3063 mapped_buffer->own_mem_ref); 2576 mapped_buffer->own_mem_ref);
@@ -3066,9 +2579,9 @@ void nvgpu_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer,
3066 mapped_buffer->sgt); 2579 mapped_buffer->sgt);
3067 2580
3068 /* remove from mapped buffer tree and remove list, free */ 2581 /* remove from mapped buffer tree and remove list, free */
3069 remove_mapped_buffer(vm, mapped_buffer); 2582 nvgpu_remove_mapped_buf(vm, mapped_buffer);
3070 if (!nvgpu_list_empty(&mapped_buffer->va_buffers_list)) 2583 if (!nvgpu_list_empty(&mapped_buffer->buffer_list))
3071 nvgpu_list_del(&mapped_buffer->va_buffers_list); 2584 nvgpu_list_del(&mapped_buffer->buffer_list);
3072 2585
3073 /* keep track of mapped buffers */ 2586 /* keep track of mapped buffers */
3074 if (mapped_buffer->user_mapped) 2587 if (mapped_buffer->user_mapped)
@@ -3082,22 +2595,6 @@ void nvgpu_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer,
3082 return; 2595 return;
3083} 2596}
3084 2597
3085void nvgpu_vm_unmap(struct vm_gk20a *vm, u64 offset)
3086{
3087 struct gk20a *g = vm->mm->g;
3088 struct mapped_buffer_node *mapped_buffer;
3089
3090 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
3091 mapped_buffer = find_mapped_buffer_locked(vm->mapped_buffers, offset);
3092 if (!mapped_buffer) {
3093 nvgpu_mutex_release(&vm->update_gmmu_lock);
3094 nvgpu_err(g, "invalid addr to unmap 0x%llx", offset);
3095 return;
3096 }
3097
3098 kref_put(&mapped_buffer->ref, gk20a_vm_unmap_locked_kref);
3099 nvgpu_mutex_release(&vm->update_gmmu_lock);
3100}
3101 2598
3102static void gk20a_vm_free_entries(struct vm_gk20a *vm, 2599static void gk20a_vm_free_entries(struct vm_gk20a *vm,
3103 struct gk20a_mm_entry *parent, 2600 struct gk20a_mm_entry *parent,
@@ -3659,7 +3156,7 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
3659 va_node->vaddr_start = vaddr_start; 3156 va_node->vaddr_start = vaddr_start;
3660 va_node->size = (u64)args->page_size * (u64)args->pages; 3157 va_node->size = (u64)args->page_size * (u64)args->pages;
3661 va_node->pgsz_idx = pgsz_idx; 3158 va_node->pgsz_idx = pgsz_idx;
3662 nvgpu_init_list_node(&va_node->va_buffers_list); 3159 nvgpu_init_list_node(&va_node->buffer_list_head);
3663 nvgpu_init_list_node(&va_node->reserved_va_list); 3160 nvgpu_init_list_node(&va_node->reserved_va_list);
3664 3161
3665 nvgpu_mutex_acquire(&vm->update_gmmu_lock); 3162 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
@@ -3723,15 +3220,15 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
3723 nvgpu_mutex_acquire(&vm->update_gmmu_lock); 3220 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
3724 va_node = addr_to_reservation(vm, args->offset); 3221 va_node = addr_to_reservation(vm, args->offset);
3725 if (va_node) { 3222 if (va_node) {
3726 struct mapped_buffer_node *buffer, *n; 3223 struct nvgpu_mapped_buf *buffer, *n;
3727 3224
3728 /* Decrement the ref count on all buffers in this va_node. This 3225 /* Decrement the ref count on all buffers in this va_node. This
3729 * allows userspace to let the kernel free mappings that are 3226 * allows userspace to let the kernel free mappings that are
3730 * only used by this va_node. */ 3227 * only used by this va_node. */
3731 nvgpu_list_for_each_entry_safe(buffer, n, 3228 nvgpu_list_for_each_entry_safe(buffer, n,
3732 &va_node->va_buffers_list, 3229 &va_node->buffer_list_head,
3733 mapped_buffer_node, va_buffers_list) { 3230 nvgpu_mapped_buf, buffer_list) {
3734 nvgpu_list_del(&buffer->va_buffers_list); 3231 nvgpu_list_del(&buffer->buffer_list);
3735 kref_put(&buffer->ref, gk20a_vm_unmap_locked_kref); 3232 kref_put(&buffer->ref, gk20a_vm_unmap_locked_kref);
3736 } 3233 }
3737 3234
@@ -3887,7 +3384,7 @@ int nvgpu_vm_map_buffer(struct vm_gk20a *vm,
3887 } 3384 }
3888 3385
3889 ret_va = nvgpu_vm_map(vm, dmabuf, *offset_align, 3386 ret_va = nvgpu_vm_map(vm, dmabuf, *offset_align,
3890 flags, kind, NULL, true, 3387 flags, kind, true,
3891 gk20a_mem_flag_none, 3388 gk20a_mem_flag_none,
3892 buffer_offset, 3389 buffer_offset,
3893 mapping_size, 3390 mapping_size,
@@ -4296,18 +3793,17 @@ hw_was_off:
4296 gk20a_idle_nosuspend(g->dev); 3793 gk20a_idle_nosuspend(g->dev);
4297} 3794}
4298 3795
4299int nvgpu_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va, 3796int nvgpu_vm_find_buf(struct vm_gk20a *vm, u64 gpu_va,
4300 struct dma_buf **dmabuf, 3797 struct dma_buf **dmabuf,
4301 u64 *offset) 3798 u64 *offset)
4302{ 3799{
4303 struct mapped_buffer_node *mapped_buffer; 3800 struct nvgpu_mapped_buf *mapped_buffer;
4304 3801
4305 gk20a_dbg_fn("gpu_va=0x%llx", gpu_va); 3802 gk20a_dbg_fn("gpu_va=0x%llx", gpu_va);
4306 3803
4307 nvgpu_mutex_acquire(&vm->update_gmmu_lock); 3804 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
4308 3805
4309 mapped_buffer = find_mapped_buffer_range_locked(vm->mapped_buffers, 3806 mapped_buffer = __nvgpu_vm_find_mapped_buf_range(vm, gpu_va);
4310 gpu_va);
4311 if (!mapped_buffer) { 3807 if (!mapped_buffer) {
4312 nvgpu_mutex_release(&vm->update_gmmu_lock); 3808 nvgpu_mutex_release(&vm->update_gmmu_lock);
4313 return -EINVAL; 3809 return -EINVAL;
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 331843cc..357962c7 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -140,52 +140,9 @@ struct priv_cmd_entry {
140 u32 size; /* in words */ 140 u32 size; /* in words */
141}; 141};
142 142
143struct mapped_buffer_node {
144 struct vm_gk20a *vm;
145 struct nvgpu_rbtree_node node;
146 struct nvgpu_list_node va_buffers_list;
147 struct vm_reserved_va_node *va_node;
148 u64 addr;
149 u64 size;
150 struct dma_buf *dmabuf;
151 struct sg_table *sgt;
152 struct kref ref;
153 u32 user_mapped;
154 bool own_mem_ref;
155 u32 pgsz_idx;
156 u32 ctag_offset;
157 u32 ctag_lines;
158 u32 ctag_allocated_lines;
159
160 /* For comptag mapping, these are the mapping window parameters */
161 bool ctags_mappable;
162 u64 ctag_map_win_addr; /* non-zero if mapped */
163 u64 ctag_map_win_size; /* non-zero if ctags_mappable */
164 u32 ctag_map_win_ctagline; /* ctagline at win start, set if
165 * ctags_mappable */
166
167 u32 flags;
168 u32 kind;
169 bool va_allocated;
170};
171
172static inline struct mapped_buffer_node *
173mapped_buffer_node_from_va_buffers_list(struct nvgpu_list_node *node)
174{
175 return (struct mapped_buffer_node *)
176 ((uintptr_t)node - offsetof(struct mapped_buffer_node, va_buffers_list));
177};
178
179static inline struct mapped_buffer_node *
180mapped_buffer_from_rbtree_node(struct nvgpu_rbtree_node *node)
181{
182 return (struct mapped_buffer_node *)
183 ((uintptr_t)node - offsetof(struct mapped_buffer_node, node));
184};
185
186struct vm_reserved_va_node { 143struct vm_reserved_va_node {
187 struct nvgpu_list_node reserved_va_list; 144 struct nvgpu_list_node reserved_va_list;
188 struct nvgpu_list_node va_buffers_list; 145 struct nvgpu_list_node buffer_list_head;
189 u32 pgsz_idx; 146 u32 pgsz_idx;
190 u64 vaddr_start; 147 u64 vaddr_start;
191 u64 size; 148 u64 size;
@@ -431,11 +388,6 @@ static inline phys_addr_t gk20a_mem_phys(struct nvgpu_mem *mem)
431 return 0; 388 return 0;
432} 389}
433 390
434u32 __nvgpu_aperture_mask(struct gk20a *g, enum nvgpu_aperture aperture,
435 u32 sysmem_mask, u32 vidmem_mask);
436u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem,
437 u32 sysmem_mask, u32 vidmem_mask);
438
439void gk20a_pde_wr32(struct gk20a *g, struct gk20a_mm_entry *entry, 391void gk20a_pde_wr32(struct gk20a *g, struct gk20a_mm_entry *entry,
440 size_t w, size_t data); 392 size_t w, size_t data);
441u64 gk20a_pde_addr(struct gk20a *g, struct gk20a_mm_entry *entry); 393u64 gk20a_pde_addr(struct gk20a *g, struct gk20a_mm_entry *entry);
@@ -532,8 +484,6 @@ const struct gk20a_mmu_level *gk20a_mm_get_mmu_levels(struct gk20a *g,
532void gk20a_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *mem, 484void gk20a_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *mem,
533 struct vm_gk20a *vm); 485 struct vm_gk20a *vm);
534 486
535void gk20a_remove_vm(struct vm_gk20a *vm, struct nvgpu_mem *inst_block);
536
537int gk20a_big_pages_possible(struct vm_gk20a *vm, u64 base, u64 size); 487int gk20a_big_pages_possible(struct vm_gk20a *vm, u64 base, u64 size);
538 488
539extern const struct gk20a_mmu_level gk20a_mm_levels_64k[]; 489extern const struct gk20a_mmu_level gk20a_mm_levels_64k[];
diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index eab51175..75dfcc86 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -401,7 +401,7 @@ static void gp10b_remove_bar2_vm(struct gk20a *g)
401 struct mm_gk20a *mm = &g->mm; 401 struct mm_gk20a *mm = &g->mm;
402 402
403 gp10b_replayable_pagefault_buffer_deinit(g); 403 gp10b_replayable_pagefault_buffer_deinit(g);
404 gk20a_remove_vm(&mm->bar2.vm, &mm->bar2.inst_block); 404 nvgpu_remove_vm(&mm->bar2.vm, &mm->bar2.inst_block);
405} 405}
406 406
407 407
diff --git a/drivers/gpu/nvgpu/include/nvgpu/vm.h b/drivers/gpu/nvgpu/include/nvgpu/vm.h
index 1fb772d5..e1ceffd4 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/vm.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/vm.h
@@ -26,7 +26,10 @@
26#include <nvgpu/allocator.h> 26#include <nvgpu/allocator.h>
27 27
28struct vm_gk20a; 28struct vm_gk20a;
29struct mapped_buffer_node; 29struct vm_reserved_va_node;
30struct buffer_attrs;
31struct gk20a_comptag_allocator;
32
30 33
31/** 34/**
32 * This header contains the OS agnostic APIs for dealing with VMs. Most of the 35 * This header contains the OS agnostic APIs for dealing with VMs. Most of the
@@ -44,6 +47,50 @@ struct vm_gk20a_mapping_batch {
44 bool need_tlb_invalidate; 47 bool need_tlb_invalidate;
45}; 48};
46 49
50struct nvgpu_mapped_buf {
51 struct vm_gk20a *vm;
52 struct nvgpu_rbtree_node node;
53 struct nvgpu_list_node buffer_list;
54 struct vm_reserved_va_node *va_node;
55 u64 addr;
56 u64 size;
57 struct dma_buf *dmabuf;
58 struct sg_table *sgt;
59 struct kref ref;
60 u32 user_mapped;
61 bool own_mem_ref;
62 u32 pgsz_idx;
63 u32 ctag_offset;
64 u32 ctag_lines;
65 u32 ctag_allocated_lines;
66
67 /* For comptag mapping, these are the mapping window parameters */
68 bool ctags_mappable;
69 u64 ctag_map_win_addr; /* non-zero if mapped */
70 u64 ctag_map_win_size; /* non-zero if ctags_mappable */
71 u32 ctag_map_win_ctagline; /* ctagline at win start, set if
72 * ctags_mappable */
73
74 u32 flags;
75 u32 kind;
76 bool va_allocated;
77};
78
79static inline struct nvgpu_mapped_buf *
80nvgpu_mapped_buf_from_buffer_list(struct nvgpu_list_node *node)
81{
82 return (struct nvgpu_mapped_buf *)
83 ((uintptr_t)node - offsetof(struct nvgpu_mapped_buf,
84 buffer_list));
85}
86
87static inline struct nvgpu_mapped_buf *
88mapped_buffer_from_rbtree_node(struct nvgpu_rbtree_node *node)
89{
90 return (struct nvgpu_mapped_buf *)
91 ((uintptr_t)node - offsetof(struct nvgpu_mapped_buf, node));
92}
93
47struct vm_gk20a { 94struct vm_gk20a {
48 struct mm_gk20a *mm; 95 struct mm_gk20a *mm;
49 struct gk20a_as_share *as_share; /* as_share this represents */ 96 struct gk20a_as_share *as_share; /* as_share this represents */
@@ -102,6 +149,8 @@ struct vm_gk20a {
102void nvgpu_vm_get(struct vm_gk20a *vm); 149void nvgpu_vm_get(struct vm_gk20a *vm);
103void nvgpu_vm_put(struct vm_gk20a *vm); 150void nvgpu_vm_put(struct vm_gk20a *vm);
104 151
152int vm_aspace_id(struct vm_gk20a *vm);
153
105/* batching eliminates redundant cache flushes and invalidates */ 154/* batching eliminates redundant cache flushes and invalidates */
106void nvgpu_vm_mapping_batch_start(struct vm_gk20a_mapping_batch *batch); 155void nvgpu_vm_mapping_batch_start(struct vm_gk20a_mapping_batch *batch);
107void nvgpu_vm_mapping_batch_finish( 156void nvgpu_vm_mapping_batch_finish(
@@ -112,24 +161,45 @@ void nvgpu_vm_mapping_batch_finish_locked(
112 161
113/* get reference to all currently mapped buffers */ 162/* get reference to all currently mapped buffers */
114int nvgpu_vm_get_buffers(struct vm_gk20a *vm, 163int nvgpu_vm_get_buffers(struct vm_gk20a *vm,
115 struct mapped_buffer_node ***mapped_buffers, 164 struct nvgpu_mapped_buf ***mapped_buffers,
116 int *num_buffers); 165 int *num_buffers);
117 166
118/* put references on the given buffers */ 167/* put references on the given buffers */
119void nvgpu_vm_put_buffers(struct vm_gk20a *vm, 168void nvgpu_vm_put_buffers(struct vm_gk20a *vm,
120 struct mapped_buffer_node **mapped_buffers, 169 struct nvgpu_mapped_buf **mapped_buffers,
121 int num_buffers); 170 int num_buffers);
122 171
123/* Note: batch may be NULL if unmap op is not part of a batch */ 172/* Note: batch may be NULL if unmap op is not part of a batch */
124int nvgpu_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset, 173int nvgpu_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset,
125 struct vm_gk20a_mapping_batch *batch); 174 struct vm_gk20a_mapping_batch *batch);
126 175
127void nvgpu_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer, 176void nvgpu_vm_unmap_locked(struct nvgpu_mapped_buf *mapped_buffer,
128 struct vm_gk20a_mapping_batch *batch); 177 struct vm_gk20a_mapping_batch *batch);
129 178
179/*
180 * These all require the VM update lock to be held.
181 */
182struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf(
183 struct vm_gk20a *vm, u64 addr);
184struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf_range(
185 struct vm_gk20a *vm, u64 addr);
186struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf_less_than(
187 struct vm_gk20a *vm, u64 addr);
188
189int nvgpu_vm_find_buf(struct vm_gk20a *vm, u64 gpu_va,
190 struct dma_buf **dmabuf,
191 u64 *offset);
192
193int nvgpu_insert_mapped_buf(struct vm_gk20a *vm,
194 struct nvgpu_mapped_buf *mapped_buffer);
195void nvgpu_remove_mapped_buf(struct vm_gk20a *vm,
196 struct nvgpu_mapped_buf *mapped_buffer);
197
130void nvgpu_vm_remove_support_nofree(struct vm_gk20a *vm); 198void nvgpu_vm_remove_support_nofree(struct vm_gk20a *vm);
131void nvgpu_vm_remove_support(struct vm_gk20a *vm); 199void nvgpu_vm_remove_support(struct vm_gk20a *vm);
132 200
201void nvgpu_remove_vm(struct vm_gk20a *vm, struct nvgpu_mem *inst_block);
202
133int nvgpu_init_vm(struct mm_gk20a *mm, 203int nvgpu_init_vm(struct mm_gk20a *mm,
134 struct vm_gk20a *vm, 204 struct vm_gk20a *vm,
135 u32 big_page_size, 205 u32 big_page_size,
diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
index cfa9e428..f4004f42 100644
--- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
@@ -202,7 +202,7 @@ static void vgpu_locked_gmmu_unmap(struct vm_gk20a *vm,
202static void vgpu_vm_remove_support(struct vm_gk20a *vm) 202static void vgpu_vm_remove_support(struct vm_gk20a *vm)
203{ 203{
204 struct gk20a *g = vm->mm->g; 204 struct gk20a *g = vm->mm->g;
205 struct mapped_buffer_node *mapped_buffer; 205 struct nvgpu_mapped_buf *mapped_buffer;
206 struct vm_reserved_va_node *va_node, *va_node_tmp; 206 struct vm_reserved_va_node *va_node, *va_node_tmp;
207 struct tegra_vgpu_cmd_msg msg; 207 struct tegra_vgpu_cmd_msg msg;
208 struct tegra_vgpu_as_share_params *p = &msg.params.as_share; 208 struct tegra_vgpu_as_share_params *p = &msg.params.as_share;