diff options
author | Alex Waterman <alexw@nvidia.com> | 2017-04-24 18:26:00 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2017-05-19 18:34:06 -0400 |
commit | 014ace5a85f274de7debb4c6168d69c803445e19 (patch) | |
tree | 4028be3294b95e38659f1ebba4a14457748e59f1 | |
parent | d37e8f7dcf190f31f9c0c12583db2bb0c0d313c0 (diff) |
gpu: nvgpu: Split VM implementation out
This patch begins splitting out the VM implementation from mm_gk20a.c and
moves it to common/linux/vm.c and common/mm/vm.c. This split is necessary
because the VM code has two portions: first, an interface for the OS
specific code to use (i.e userspace mappings), and second, a set of APIs
for the driver to use (init, cleanup, etc) which are not OS specific.
This is only the beginning of the split - there's still a lot of things
that need to be carefully moved around.
JIRA NVGPU-12
JIRA NVGPU-30
Change-Id: I3b57cba245d7daf9e4326a143b9c6217e0f28c96
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: http://git-master/r/1477743
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
-rw-r--r-- | drivers/gpu/nvgpu/Makefile.nvgpu | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/vm.c | 421 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/vm_priv.h | 36 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/vm.c | 65 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/cde_gk20a.c | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.h | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 622 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 52 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gp10b/mm_gp10b.c | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/include/nvgpu/vm.h | 78 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/vgpu/mm_vgpu.c | 2 |
13 files changed, 661 insertions, 630 deletions
diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu b/drivers/gpu/nvgpu/Makefile.nvgpu index 0a60eece..2f9d1b36 100644 --- a/drivers/gpu/nvgpu/Makefile.nvgpu +++ b/drivers/gpu/nvgpu/Makefile.nvgpu | |||
@@ -39,6 +39,7 @@ nvgpu-y := \ | |||
39 | common/linux/driver_common.o \ | 39 | common/linux/driver_common.o \ |
40 | common/linux/firmware.o \ | 40 | common/linux/firmware.o \ |
41 | common/linux/thread.o \ | 41 | common/linux/thread.o \ |
42 | common/linux/vm.o \ | ||
42 | common/mm/nvgpu_allocator.o \ | 43 | common/mm/nvgpu_allocator.o \ |
43 | common/mm/bitmap_allocator.o \ | 44 | common/mm/bitmap_allocator.o \ |
44 | common/mm/buddy_allocator.o \ | 45 | common/mm/buddy_allocator.o \ |
diff --git a/drivers/gpu/nvgpu/common/linux/vm.c b/drivers/gpu/nvgpu/common/linux/vm.c new file mode 100644 index 00000000..8b9d6f96 --- /dev/null +++ b/drivers/gpu/nvgpu/common/linux/vm.c | |||
@@ -0,0 +1,421 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/dma-buf.h> | ||
18 | #include <linux/scatterlist.h> | ||
19 | |||
20 | #include <nvgpu/log.h> | ||
21 | #include <nvgpu/lock.h> | ||
22 | #include <nvgpu/rbtree.h> | ||
23 | #include <nvgpu/page_allocator.h> | ||
24 | |||
25 | #include "gk20a/gk20a.h" | ||
26 | #include "gk20a/mm_gk20a.h" | ||
27 | |||
28 | #include "vm_priv.h" | ||
29 | |||
30 | static struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf_reverse( | ||
31 | struct vm_gk20a *vm, struct dma_buf *dmabuf, u32 kind) | ||
32 | { | ||
33 | struct nvgpu_rbtree_node *node = NULL; | ||
34 | struct nvgpu_rbtree_node *root = vm->mapped_buffers; | ||
35 | |||
36 | nvgpu_rbtree_enum_start(0, &node, root); | ||
37 | |||
38 | while (node) { | ||
39 | struct nvgpu_mapped_buf *mapped_buffer = | ||
40 | mapped_buffer_from_rbtree_node(node); | ||
41 | |||
42 | if (mapped_buffer->dmabuf == dmabuf && | ||
43 | kind == mapped_buffer->kind) | ||
44 | return mapped_buffer; | ||
45 | |||
46 | nvgpu_rbtree_enum_next(&node, node); | ||
47 | } | ||
48 | |||
49 | return NULL; | ||
50 | } | ||
51 | |||
52 | /* | ||
53 | * Determine alignment for a passed buffer. Necessary since the buffer may | ||
54 | * appear big to map with large pages but the SGL may have chunks that are not | ||
55 | * aligned on a 64/128kB large page boundary. | ||
56 | */ | ||
57 | static u64 nvgpu_get_buffer_alignment(struct gk20a *g, struct scatterlist *sgl, | ||
58 | enum nvgpu_aperture aperture) | ||
59 | { | ||
60 | u64 align = 0, chunk_align = 0; | ||
61 | u64 buf_addr; | ||
62 | |||
63 | if (aperture == APERTURE_VIDMEM) { | ||
64 | struct nvgpu_page_alloc *alloc = get_vidmem_page_alloc(sgl); | ||
65 | struct page_alloc_chunk *chunk = NULL; | ||
66 | |||
67 | nvgpu_list_for_each_entry(chunk, &alloc->alloc_chunks, | ||
68 | page_alloc_chunk, list_entry) { | ||
69 | chunk_align = 1ULL << __ffs(chunk->base | | ||
70 | chunk->length); | ||
71 | |||
72 | if (align) | ||
73 | align = min(align, chunk_align); | ||
74 | else | ||
75 | align = chunk_align; | ||
76 | } | ||
77 | |||
78 | return align; | ||
79 | } | ||
80 | |||
81 | buf_addr = (u64)sg_dma_address(sgl); | ||
82 | |||
83 | if (g->mm.bypass_smmu || buf_addr == DMA_ERROR_CODE || !buf_addr) { | ||
84 | while (sgl) { | ||
85 | buf_addr = (u64)sg_phys(sgl); | ||
86 | chunk_align = 1ULL << __ffs(buf_addr | | ||
87 | (u64)sgl->length); | ||
88 | |||
89 | if (align) | ||
90 | align = min(align, chunk_align); | ||
91 | else | ||
92 | align = chunk_align; | ||
93 | sgl = sg_next(sgl); | ||
94 | } | ||
95 | |||
96 | return align; | ||
97 | } | ||
98 | |||
99 | align = 1ULL << __ffs(buf_addr); | ||
100 | |||
101 | return align; | ||
102 | } | ||
103 | |||
104 | /* | ||
105 | * vm->update_gmmu_lock must be held. This checks to see if we already have | ||
106 | * mapped the passed buffer into this VM. If so, just return the existing | ||
107 | * mapping address. | ||
108 | */ | ||
109 | static u64 __nvgpu_vm_find_mapping(struct vm_gk20a *vm, | ||
110 | struct dma_buf *dmabuf, | ||
111 | u64 offset_align, | ||
112 | u32 flags, | ||
113 | int kind, | ||
114 | bool user_mapped, | ||
115 | int rw_flag) | ||
116 | { | ||
117 | struct gk20a *g = gk20a_from_vm(vm); | ||
118 | struct nvgpu_mapped_buf *mapped_buffer = NULL; | ||
119 | |||
120 | if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) { | ||
121 | mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, offset_align); | ||
122 | if (!mapped_buffer) | ||
123 | return 0; | ||
124 | |||
125 | if (mapped_buffer->dmabuf != dmabuf || | ||
126 | mapped_buffer->kind != (u32)kind) | ||
127 | return 0; | ||
128 | } else { | ||
129 | mapped_buffer = | ||
130 | __nvgpu_vm_find_mapped_buf_reverse(vm, dmabuf, kind); | ||
131 | if (!mapped_buffer) | ||
132 | return 0; | ||
133 | } | ||
134 | |||
135 | if (mapped_buffer->flags != flags) | ||
136 | return 0; | ||
137 | |||
138 | /* mark the buffer as used */ | ||
139 | if (user_mapped) { | ||
140 | if (mapped_buffer->user_mapped == 0) | ||
141 | vm->num_user_mapped_buffers++; | ||
142 | mapped_buffer->user_mapped++; | ||
143 | |||
144 | /* If the mapping comes from user space, we own | ||
145 | * the handle ref. Since we reuse an | ||
146 | * existing mapping here, we need to give back those | ||
147 | * refs once in order not to leak. | ||
148 | */ | ||
149 | if (mapped_buffer->own_mem_ref) | ||
150 | dma_buf_put(mapped_buffer->dmabuf); | ||
151 | else | ||
152 | mapped_buffer->own_mem_ref = true; | ||
153 | } | ||
154 | kref_get(&mapped_buffer->ref); | ||
155 | |||
156 | nvgpu_log(g, gpu_dbg_map, | ||
157 | "gv: 0x%04x_%08x + 0x%-7zu " | ||
158 | "[dma: 0x%02x_%08x, pa: 0x%02x_%08x] " | ||
159 | "pgsz=%-3dKb as=%-2d ctags=%d start=%d " | ||
160 | "flags=0x%x apt=%s (reused)", | ||
161 | u64_hi32(mapped_buffer->addr), u64_lo32(mapped_buffer->addr), | ||
162 | dmabuf->size, | ||
163 | u64_hi32((u64)sg_dma_address(mapped_buffer->sgt->sgl)), | ||
164 | u64_lo32((u64)sg_dma_address(mapped_buffer->sgt->sgl)), | ||
165 | u64_hi32((u64)sg_phys(mapped_buffer->sgt->sgl)), | ||
166 | u64_lo32((u64)sg_phys(mapped_buffer->sgt->sgl)), | ||
167 | vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10, | ||
168 | vm_aspace_id(vm), | ||
169 | mapped_buffer->ctag_lines, mapped_buffer->ctag_offset, | ||
170 | mapped_buffer->flags, | ||
171 | nvgpu_aperture_str(gk20a_dmabuf_aperture(g, dmabuf))); | ||
172 | |||
173 | return mapped_buffer->addr; | ||
174 | } | ||
175 | |||
176 | u64 nvgpu_vm_map(struct vm_gk20a *vm, | ||
177 | struct dma_buf *dmabuf, | ||
178 | u64 offset_align, | ||
179 | u32 flags, | ||
180 | int kind, | ||
181 | bool user_mapped, | ||
182 | int rw_flag, | ||
183 | u64 buffer_offset, | ||
184 | u64 mapping_size, | ||
185 | struct vm_gk20a_mapping_batch *batch) | ||
186 | { | ||
187 | struct gk20a *g = gk20a_from_vm(vm); | ||
188 | struct gk20a_comptag_allocator *ctag_allocator = &g->gr.comp_tags; | ||
189 | struct nvgpu_mapped_buf *mapped_buffer = NULL; | ||
190 | bool inserted = false, va_allocated = false; | ||
191 | u64 map_offset = 0; | ||
192 | int err = 0; | ||
193 | struct buffer_attrs bfr = {NULL}; | ||
194 | struct gk20a_comptags comptags; | ||
195 | bool clear_ctags = false; | ||
196 | struct scatterlist *sgl; | ||
197 | u64 ctag_map_win_size = 0; | ||
198 | u32 ctag_map_win_ctagline = 0; | ||
199 | struct vm_reserved_va_node *va_node = NULL; | ||
200 | u32 ctag_offset; | ||
201 | enum nvgpu_aperture aperture; | ||
202 | |||
203 | if (user_mapped && vm->userspace_managed && | ||
204 | !(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)) { | ||
205 | nvgpu_err(g, "non-fixed-offset mapping not available on " | ||
206 | "userspace managed address spaces"); | ||
207 | return -EFAULT; | ||
208 | } | ||
209 | |||
210 | nvgpu_mutex_acquire(&vm->update_gmmu_lock); | ||
211 | |||
212 | /* check if this buffer is already mapped */ | ||
213 | if (!vm->userspace_managed) { | ||
214 | map_offset = __nvgpu_vm_find_mapping( | ||
215 | vm, dmabuf, offset_align, | ||
216 | flags, kind, | ||
217 | user_mapped, rw_flag); | ||
218 | if (map_offset) { | ||
219 | nvgpu_mutex_release(&vm->update_gmmu_lock); | ||
220 | return map_offset; | ||
221 | } | ||
222 | } | ||
223 | |||
224 | /* pin buffer to get phys/iovmm addr */ | ||
225 | bfr.sgt = gk20a_mm_pin(g->dev, dmabuf); | ||
226 | if (IS_ERR(bfr.sgt)) { | ||
227 | /* Falling back to physical is actually possible | ||
228 | * here in many cases if we use 4K phys pages in the | ||
229 | * gmmu. However we have some regions which require | ||
230 | * contig regions to work properly (either phys-contig | ||
231 | * or contig through smmu io_vaspace). Until we can | ||
232 | * track the difference between those two cases we have | ||
233 | * to fail the mapping when we run out of SMMU space. | ||
234 | */ | ||
235 | nvgpu_warn(g, "oom allocating tracking buffer"); | ||
236 | goto clean_up; | ||
237 | } | ||
238 | |||
239 | bfr.kind_v = kind; | ||
240 | bfr.size = dmabuf->size; | ||
241 | sgl = bfr.sgt->sgl; | ||
242 | |||
243 | aperture = gk20a_dmabuf_aperture(g, dmabuf); | ||
244 | if (aperture == APERTURE_INVALID) { | ||
245 | err = -EINVAL; | ||
246 | goto clean_up; | ||
247 | } | ||
248 | |||
249 | if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) | ||
250 | map_offset = offset_align; | ||
251 | |||
252 | bfr.align = nvgpu_get_buffer_alignment(g, sgl, aperture); | ||
253 | bfr.pgsz_idx = __get_pte_size(vm, map_offset, | ||
254 | min_t(u64, bfr.size, bfr.align)); | ||
255 | mapping_size = mapping_size ? mapping_size : bfr.size; | ||
256 | |||
257 | /* Check if we should use a fixed offset for mapping this buffer */ | ||
258 | if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) { | ||
259 | err = validate_fixed_buffer(vm, &bfr, | ||
260 | offset_align, mapping_size, | ||
261 | &va_node); | ||
262 | if (err) | ||
263 | goto clean_up; | ||
264 | |||
265 | map_offset = offset_align; | ||
266 | va_allocated = false; | ||
267 | } else | ||
268 | va_allocated = true; | ||
269 | |||
270 | err = setup_buffer_kind_and_compression(vm, flags, &bfr, bfr.pgsz_idx); | ||
271 | if (unlikely(err)) { | ||
272 | nvgpu_err(g, "failure setting up kind and compression"); | ||
273 | goto clean_up; | ||
274 | } | ||
275 | |||
276 | /* bar1 and pmu vm don't need ctag */ | ||
277 | if (!vm->enable_ctag) | ||
278 | bfr.ctag_lines = 0; | ||
279 | |||
280 | gk20a_get_comptags(g->dev, dmabuf, &comptags); | ||
281 | |||
282 | /* ensure alignment to compression page size if compression enabled */ | ||
283 | if (bfr.ctag_offset) | ||
284 | mapping_size = ALIGN(mapping_size, | ||
285 | g->ops.fb.compression_page_size(g)); | ||
286 | |||
287 | if (bfr.ctag_lines && !comptags.lines) { | ||
288 | const bool user_mappable = | ||
289 | !!(flags & NVGPU_AS_MAP_BUFFER_FLAGS_MAPPABLE_COMPBITS); | ||
290 | |||
291 | /* allocate compression resources if needed */ | ||
292 | err = gk20a_alloc_comptags(g, g->dev, dmabuf, ctag_allocator, | ||
293 | bfr.ctag_lines, user_mappable, | ||
294 | &ctag_map_win_size, | ||
295 | &ctag_map_win_ctagline); | ||
296 | if (err) { | ||
297 | /* ok to fall back here if we ran out */ | ||
298 | /* TBD: we can partially alloc ctags as well... */ | ||
299 | bfr.kind_v = bfr.uc_kind_v; | ||
300 | } else { | ||
301 | gk20a_get_comptags(g->dev, dmabuf, &comptags); | ||
302 | |||
303 | if (g->ops.ltc.cbc_ctrl) | ||
304 | g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_clear, | ||
305 | comptags.offset, | ||
306 | comptags.offset + | ||
307 | comptags.allocated_lines - 1); | ||
308 | else | ||
309 | clear_ctags = true; | ||
310 | } | ||
311 | } | ||
312 | |||
313 | /* store the comptag info */ | ||
314 | bfr.ctag_offset = comptags.offset; | ||
315 | bfr.ctag_lines = comptags.lines; | ||
316 | bfr.ctag_allocated_lines = comptags.allocated_lines; | ||
317 | bfr.ctag_user_mappable = comptags.user_mappable; | ||
318 | |||
319 | /* | ||
320 | * Calculate comptag index for this mapping. Differs in | ||
321 | * case of partial mapping. | ||
322 | */ | ||
323 | ctag_offset = comptags.offset; | ||
324 | if (ctag_offset) | ||
325 | ctag_offset += buffer_offset >> | ||
326 | ilog2(g->ops.fb.compression_page_size(g)); | ||
327 | |||
328 | /* update gmmu ptes */ | ||
329 | map_offset = g->ops.mm.gmmu_map(vm, map_offset, | ||
330 | bfr.sgt, | ||
331 | buffer_offset, /* sg offset */ | ||
332 | mapping_size, | ||
333 | bfr.pgsz_idx, | ||
334 | bfr.kind_v, | ||
335 | ctag_offset, | ||
336 | flags, rw_flag, | ||
337 | clear_ctags, | ||
338 | false, | ||
339 | false, | ||
340 | batch, | ||
341 | aperture); | ||
342 | if (!map_offset) | ||
343 | goto clean_up; | ||
344 | |||
345 | mapped_buffer = nvgpu_kzalloc(g, sizeof(*mapped_buffer)); | ||
346 | if (!mapped_buffer) { | ||
347 | nvgpu_warn(g, "oom allocating tracking buffer"); | ||
348 | goto clean_up; | ||
349 | } | ||
350 | mapped_buffer->dmabuf = dmabuf; | ||
351 | mapped_buffer->sgt = bfr.sgt; | ||
352 | mapped_buffer->addr = map_offset; | ||
353 | mapped_buffer->size = mapping_size; | ||
354 | mapped_buffer->pgsz_idx = bfr.pgsz_idx; | ||
355 | mapped_buffer->ctag_offset = bfr.ctag_offset; | ||
356 | mapped_buffer->ctag_lines = bfr.ctag_lines; | ||
357 | mapped_buffer->ctag_allocated_lines = bfr.ctag_allocated_lines; | ||
358 | mapped_buffer->ctags_mappable = bfr.ctag_user_mappable; | ||
359 | mapped_buffer->ctag_map_win_size = ctag_map_win_size; | ||
360 | mapped_buffer->ctag_map_win_ctagline = ctag_map_win_ctagline; | ||
361 | mapped_buffer->vm = vm; | ||
362 | mapped_buffer->flags = flags; | ||
363 | mapped_buffer->kind = kind; | ||
364 | mapped_buffer->va_allocated = va_allocated; | ||
365 | mapped_buffer->user_mapped = user_mapped ? 1 : 0; | ||
366 | mapped_buffer->own_mem_ref = user_mapped; | ||
367 | nvgpu_init_list_node(&mapped_buffer->buffer_list); | ||
368 | kref_init(&mapped_buffer->ref); | ||
369 | |||
370 | err = nvgpu_insert_mapped_buf(vm, mapped_buffer); | ||
371 | if (err) { | ||
372 | nvgpu_err(g, "failed to insert into mapped buffer tree"); | ||
373 | goto clean_up; | ||
374 | } | ||
375 | inserted = true; | ||
376 | if (user_mapped) | ||
377 | vm->num_user_mapped_buffers++; | ||
378 | |||
379 | if (va_node) { | ||
380 | nvgpu_list_add_tail(&mapped_buffer->buffer_list, | ||
381 | &va_node->buffer_list_head); | ||
382 | mapped_buffer->va_node = va_node; | ||
383 | } | ||
384 | |||
385 | nvgpu_mutex_release(&vm->update_gmmu_lock); | ||
386 | |||
387 | return map_offset; | ||
388 | |||
389 | clean_up: | ||
390 | if (inserted) { | ||
391 | nvgpu_remove_mapped_buf(vm, mapped_buffer); | ||
392 | if (user_mapped) | ||
393 | vm->num_user_mapped_buffers--; | ||
394 | } | ||
395 | nvgpu_kfree(g, mapped_buffer); | ||
396 | if (va_allocated) | ||
397 | gk20a_vm_free_va(vm, map_offset, bfr.size, bfr.pgsz_idx); | ||
398 | if (!IS_ERR(bfr.sgt)) | ||
399 | gk20a_mm_unpin(g->dev, dmabuf, bfr.sgt); | ||
400 | |||
401 | nvgpu_mutex_release(&vm->update_gmmu_lock); | ||
402 | nvgpu_log_info(g, "err=%d\n", err); | ||
403 | return 0; | ||
404 | } | ||
405 | |||
406 | void nvgpu_vm_unmap(struct vm_gk20a *vm, u64 offset) | ||
407 | { | ||
408 | struct gk20a *g = vm->mm->g; | ||
409 | struct nvgpu_mapped_buf *mapped_buffer; | ||
410 | |||
411 | nvgpu_mutex_acquire(&vm->update_gmmu_lock); | ||
412 | mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, offset); | ||
413 | if (!mapped_buffer) { | ||
414 | nvgpu_mutex_release(&vm->update_gmmu_lock); | ||
415 | nvgpu_err(g, "invalid addr to unmap 0x%llx", offset); | ||
416 | return; | ||
417 | } | ||
418 | |||
419 | kref_put(&mapped_buffer->ref, gk20a_vm_unmap_locked_kref); | ||
420 | nvgpu_mutex_release(&vm->update_gmmu_lock); | ||
421 | } | ||
diff --git a/drivers/gpu/nvgpu/common/linux/vm_priv.h b/drivers/gpu/nvgpu/common/linux/vm_priv.h index c0fb0ffe..9e064d76 100644 --- a/drivers/gpu/nvgpu/common/linux/vm_priv.h +++ b/drivers/gpu/nvgpu/common/linux/vm_priv.h | |||
@@ -25,12 +25,24 @@ struct dma_buf; | |||
25 | struct vm_gk20a; | 25 | struct vm_gk20a; |
26 | struct vm_gk20a_mapping_batch; | 26 | struct vm_gk20a_mapping_batch; |
27 | 27 | ||
28 | struct buffer_attrs { | ||
29 | struct sg_table *sgt; | ||
30 | u64 size; | ||
31 | u64 align; | ||
32 | u32 ctag_offset; | ||
33 | u32 ctag_lines; | ||
34 | u32 ctag_allocated_lines; | ||
35 | int pgsz_idx; | ||
36 | u8 kind_v; | ||
37 | u8 uc_kind_v; | ||
38 | bool ctag_user_mappable; | ||
39 | }; | ||
40 | |||
28 | u64 nvgpu_vm_map(struct vm_gk20a *vm, | 41 | u64 nvgpu_vm_map(struct vm_gk20a *vm, |
29 | struct dma_buf *dmabuf, | 42 | struct dma_buf *dmabuf, |
30 | u64 offset_align, | 43 | u64 offset_align, |
31 | u32 flags /*NVGPU_AS_MAP_BUFFER_FLAGS_*/, | 44 | u32 flags, |
32 | int kind, | 45 | int kind, |
33 | struct sg_table **sgt, | ||
34 | bool user_mapped, | 46 | bool user_mapped, |
35 | int rw_flag, | 47 | int rw_flag, |
36 | u64 buffer_offset, | 48 | u64 buffer_offset, |
@@ -59,4 +71,24 @@ void nvgpu_vm_unmap(struct vm_gk20a *vm, u64 offset); | |||
59 | int nvgpu_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va, | 71 | int nvgpu_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va, |
60 | struct dma_buf **dmabuf, | 72 | struct dma_buf **dmabuf, |
61 | u64 *offset); | 73 | u64 *offset); |
74 | |||
75 | enum nvgpu_aperture gk20a_dmabuf_aperture(struct gk20a *g, | ||
76 | struct dma_buf *dmabuf); | ||
77 | int validate_fixed_buffer(struct vm_gk20a *vm, | ||
78 | struct buffer_attrs *bfr, | ||
79 | u64 map_offset, u64 map_size, | ||
80 | struct vm_reserved_va_node **pva_node); | ||
81 | int setup_buffer_kind_and_compression(struct vm_gk20a *vm, | ||
82 | u32 flags, | ||
83 | struct buffer_attrs *bfr, | ||
84 | enum gmmu_pgsz_gk20a pgsz_idx); | ||
85 | int gk20a_alloc_comptags(struct gk20a *g, | ||
86 | struct device *dev, | ||
87 | struct dma_buf *dmabuf, | ||
88 | struct gk20a_comptag_allocator *allocator, | ||
89 | u32 lines, bool user_mappable, | ||
90 | u64 *ctag_map_win_size, | ||
91 | u32 *ctag_map_win_ctagline); | ||
92 | void gk20a_vm_unmap_locked_kref(struct kref *ref); | ||
93 | |||
62 | #endif | 94 | #endif |
diff --git a/drivers/gpu/nvgpu/common/mm/vm.c b/drivers/gpu/nvgpu/common/mm/vm.c index eaf30fd0..635ac0fb 100644 --- a/drivers/gpu/nvgpu/common/mm/vm.c +++ b/drivers/gpu/nvgpu/common/mm/vm.c | |||
@@ -23,6 +23,11 @@ | |||
23 | #include "gk20a/gk20a.h" | 23 | #include "gk20a/gk20a.h" |
24 | #include "gk20a/mm_gk20a.h" | 24 | #include "gk20a/mm_gk20a.h" |
25 | 25 | ||
26 | int vm_aspace_id(struct vm_gk20a *vm) | ||
27 | { | ||
28 | return vm->as_share ? vm->as_share->id : -1; | ||
29 | } | ||
30 | |||
26 | void nvgpu_vm_mapping_batch_start(struct vm_gk20a_mapping_batch *mapping_batch) | 31 | void nvgpu_vm_mapping_batch_start(struct vm_gk20a_mapping_batch *mapping_batch) |
27 | { | 32 | { |
28 | memset(mapping_batch, 0, sizeof(*mapping_batch)); | 33 | memset(mapping_batch, 0, sizeof(*mapping_batch)); |
@@ -52,7 +57,7 @@ void nvgpu_vm_mapping_batch_finish(struct vm_gk20a *vm, | |||
52 | 57 | ||
53 | void nvgpu_vm_remove_support_nofree(struct vm_gk20a *vm) | 58 | void nvgpu_vm_remove_support_nofree(struct vm_gk20a *vm) |
54 | { | 59 | { |
55 | struct mapped_buffer_node *mapped_buffer; | 60 | struct nvgpu_mapped_buf *mapped_buffer; |
56 | struct vm_reserved_va_node *va_node, *va_node_tmp; | 61 | struct vm_reserved_va_node *va_node, *va_node_tmp; |
57 | struct nvgpu_rbtree_node *node = NULL; | 62 | struct nvgpu_rbtree_node *node = NULL; |
58 | struct gk20a *g = vm->mm->g; | 63 | struct gk20a *g = vm->mm->g; |
@@ -118,7 +123,7 @@ void nvgpu_vm_put(struct vm_gk20a *vm) | |||
118 | kref_put(&vm->ref, nvgpu_vm_remove_support_kref); | 123 | kref_put(&vm->ref, nvgpu_vm_remove_support_kref); |
119 | } | 124 | } |
120 | 125 | ||
121 | void gk20a_remove_vm(struct vm_gk20a *vm, struct nvgpu_mem *inst_block) | 126 | void nvgpu_remove_vm(struct vm_gk20a *vm, struct nvgpu_mem *inst_block) |
122 | { | 127 | { |
123 | struct gk20a *g = vm->mm->g; | 128 | struct gk20a *g = vm->mm->g; |
124 | 129 | ||
@@ -127,3 +132,59 @@ void gk20a_remove_vm(struct vm_gk20a *vm, struct nvgpu_mem *inst_block) | |||
127 | gk20a_free_inst_block(g, inst_block); | 132 | gk20a_free_inst_block(g, inst_block); |
128 | nvgpu_vm_remove_support_nofree(vm); | 133 | nvgpu_vm_remove_support_nofree(vm); |
129 | } | 134 | } |
135 | |||
136 | int nvgpu_insert_mapped_buf(struct vm_gk20a *vm, | ||
137 | struct nvgpu_mapped_buf *mapped_buffer) | ||
138 | { | ||
139 | mapped_buffer->node.key_start = mapped_buffer->addr; | ||
140 | mapped_buffer->node.key_end = mapped_buffer->addr + mapped_buffer->size; | ||
141 | |||
142 | nvgpu_rbtree_insert(&mapped_buffer->node, &vm->mapped_buffers); | ||
143 | |||
144 | return 0; | ||
145 | } | ||
146 | |||
147 | void nvgpu_remove_mapped_buf(struct vm_gk20a *vm, | ||
148 | struct nvgpu_mapped_buf *mapped_buffer) | ||
149 | { | ||
150 | nvgpu_rbtree_unlink(&mapped_buffer->node, &vm->mapped_buffers); | ||
151 | } | ||
152 | |||
153 | struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf( | ||
154 | struct vm_gk20a *vm, u64 addr) | ||
155 | { | ||
156 | struct nvgpu_rbtree_node *node = NULL; | ||
157 | struct nvgpu_rbtree_node *root = vm->mapped_buffers; | ||
158 | |||
159 | nvgpu_rbtree_search(addr, &node, root); | ||
160 | if (!node) | ||
161 | return NULL; | ||
162 | |||
163 | return mapped_buffer_from_rbtree_node(node); | ||
164 | } | ||
165 | |||
166 | struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf_range( | ||
167 | struct vm_gk20a *vm, u64 addr) | ||
168 | { | ||
169 | struct nvgpu_rbtree_node *node = NULL; | ||
170 | struct nvgpu_rbtree_node *root = vm->mapped_buffers; | ||
171 | |||
172 | nvgpu_rbtree_range_search(addr, &node, root); | ||
173 | if (!node) | ||
174 | return NULL; | ||
175 | |||
176 | return mapped_buffer_from_rbtree_node(node); | ||
177 | } | ||
178 | |||
179 | struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf_less_than( | ||
180 | struct vm_gk20a *vm, u64 addr) | ||
181 | { | ||
182 | struct nvgpu_rbtree_node *node = NULL; | ||
183 | struct nvgpu_rbtree_node *root = vm->mapped_buffers; | ||
184 | |||
185 | nvgpu_rbtree_less_than_search(addr, &node, root); | ||
186 | if (!node) | ||
187 | return NULL; | ||
188 | |||
189 | return mapped_buffer_from_rbtree_node(node); | ||
190 | } | ||
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c index cf95019b..d2bb3ee9 100644 --- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c | |||
@@ -1025,7 +1025,7 @@ __releases(&cde_app->mutex) | |||
1025 | get_dma_buf(compbits_scatter_buf); /* a ref for nvgpu_vm_map */ | 1025 | get_dma_buf(compbits_scatter_buf); /* a ref for nvgpu_vm_map */ |
1026 | map_vaddr = nvgpu_vm_map(cde_ctx->vm, compbits_scatter_buf, 0, | 1026 | map_vaddr = nvgpu_vm_map(cde_ctx->vm, compbits_scatter_buf, 0, |
1027 | NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, | 1027 | NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, |
1028 | compbits_kind, NULL, true, | 1028 | compbits_kind, true, |
1029 | gk20a_mem_flag_none, | 1029 | gk20a_mem_flag_none, |
1030 | map_offset, map_size, | 1030 | map_offset, map_size, |
1031 | NULL); | 1031 | NULL); |
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index b7fb363e..9e3bc05e 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c | |||
@@ -1406,7 +1406,7 @@ static void trace_write_pushbuffer(struct channel_gk20a *c, | |||
1406 | int err; | 1406 | int err; |
1407 | 1407 | ||
1408 | words = pbdma_gp_entry1_length_v(g->entry1); | 1408 | words = pbdma_gp_entry1_length_v(g->entry1); |
1409 | err = nvgpu_vm_find_buffer(c->vm, gpu_va, &dmabuf, &offset); | 1409 | err = nvgpu_vm_find_buf(c->vm, gpu_va, &dmabuf, &offset); |
1410 | if (!err) | 1410 | if (!err) |
1411 | mem = dma_buf_vmap(dmabuf); | 1411 | mem = dma_buf_vmap(dmabuf); |
1412 | } | 1412 | } |
@@ -1903,7 +1903,7 @@ static int gk20a_channel_add_job(struct channel_gk20a *c, | |||
1903 | bool skip_buffer_refcounting) | 1903 | bool skip_buffer_refcounting) |
1904 | { | 1904 | { |
1905 | struct vm_gk20a *vm = c->vm; | 1905 | struct vm_gk20a *vm = c->vm; |
1906 | struct mapped_buffer_node **mapped_buffers = NULL; | 1906 | struct nvgpu_mapped_buf **mapped_buffers = NULL; |
1907 | int err = 0, num_mapped_buffers = 0; | 1907 | int err = 0, num_mapped_buffers = 0; |
1908 | bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(c); | 1908 | bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(c); |
1909 | 1909 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h index 1cbf7689..bbc1a72a 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h | |||
@@ -60,7 +60,7 @@ struct channel_ctx_gk20a { | |||
60 | }; | 60 | }; |
61 | 61 | ||
62 | struct channel_gk20a_job { | 62 | struct channel_gk20a_job { |
63 | struct mapped_buffer_node **mapped_buffers; | 63 | struct nvgpu_mapped_buf **mapped_buffers; |
64 | int num_mapped_buffers; | 64 | int num_mapped_buffers; |
65 | struct gk20a_fence *pre_fence; | 65 | struct gk20a_fence *pre_fence; |
66 | struct gk20a_fence *post_fence; | 66 | struct gk20a_fence *post_fence; |
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c index 5351750a..54317195 100644 --- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c | |||
@@ -1924,7 +1924,7 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, | |||
1924 | err_unmap: | 1924 | err_unmap: |
1925 | nvgpu_vm_unmap_buffer(vm, args->offset, NULL); | 1925 | nvgpu_vm_unmap_buffer(vm, args->offset, NULL); |
1926 | err_remove_vm: | 1926 | err_remove_vm: |
1927 | gk20a_remove_vm(vm, &mm->perfbuf.inst_block); | 1927 | nvgpu_remove_vm(vm, &mm->perfbuf.inst_block); |
1928 | nvgpu_mutex_release(&g->dbg_sessions_lock); | 1928 | nvgpu_mutex_release(&g->dbg_sessions_lock); |
1929 | return err; | 1929 | return err; |
1930 | } | 1930 | } |
@@ -1962,7 +1962,7 @@ static int gk20a_perfbuf_release_locked(struct gk20a *g, u64 offset) | |||
1962 | err = gk20a_perfbuf_disable_locked(g); | 1962 | err = gk20a_perfbuf_disable_locked(g); |
1963 | 1963 | ||
1964 | nvgpu_vm_unmap_buffer(vm, offset, NULL); | 1964 | nvgpu_vm_unmap_buffer(vm, offset, NULL); |
1965 | gk20a_remove_vm(vm, &mm->perfbuf.inst_block); | 1965 | nvgpu_remove_vm(vm, &mm->perfbuf.inst_block); |
1966 | 1966 | ||
1967 | g->perfbuf.owner = NULL; | 1967 | g->perfbuf.owner = NULL; |
1968 | g->perfbuf.offset = 0; | 1968 | g->perfbuf.offset = 0; |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 72a3ee13..84919d50 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <uapi/linux/nvgpu.h> | 25 | #include <uapi/linux/nvgpu.h> |
26 | #include <trace/events/gk20a.h> | 26 | #include <trace/events/gk20a.h> |
27 | 27 | ||
28 | #include <nvgpu/vm.h> | ||
28 | #include <nvgpu/dma.h> | 29 | #include <nvgpu/dma.h> |
29 | #include <nvgpu/kmem.h> | 30 | #include <nvgpu/kmem.h> |
30 | #include <nvgpu/timers.h> | 31 | #include <nvgpu/timers.h> |
@@ -121,25 +122,6 @@ struct nvgpu_page_alloc *get_vidmem_page_alloc(struct scatterlist *sgl) | |||
121 | * | 122 | * |
122 | */ | 123 | */ |
123 | 124 | ||
124 | static inline int vm_aspace_id(struct vm_gk20a *vm) | ||
125 | { | ||
126 | /* -1 is bar1 or pmu, etc. */ | ||
127 | return vm->as_share ? vm->as_share->id : -1; | ||
128 | } | ||
129 | static inline u32 hi32(u64 f) | ||
130 | { | ||
131 | return (u32)(f >> 32); | ||
132 | } | ||
133 | static inline u32 lo32(u64 f) | ||
134 | { | ||
135 | return (u32)(f & 0xffffffff); | ||
136 | } | ||
137 | |||
138 | static struct mapped_buffer_node *find_mapped_buffer_locked( | ||
139 | struct nvgpu_rbtree_node *root, u64 addr); | ||
140 | static struct mapped_buffer_node *find_mapped_buffer_reverse_locked( | ||
141 | struct nvgpu_rbtree_node *root, struct dma_buf *dmabuf, | ||
142 | u32 kind); | ||
143 | static int update_gmmu_ptes_locked(struct vm_gk20a *vm, | 125 | static int update_gmmu_ptes_locked(struct vm_gk20a *vm, |
144 | enum gmmu_pgsz_gk20a pgsz_idx, | 126 | enum gmmu_pgsz_gk20a pgsz_idx, |
145 | struct sg_table *sgt, u64 buffer_offset, | 127 | struct sg_table *sgt, u64 buffer_offset, |
@@ -316,13 +298,13 @@ void gk20a_get_comptags(struct device *dev, struct dma_buf *dmabuf, | |||
316 | *comptags = priv->comptags; | 298 | *comptags = priv->comptags; |
317 | } | 299 | } |
318 | 300 | ||
319 | static int gk20a_alloc_comptags(struct gk20a *g, | 301 | int gk20a_alloc_comptags(struct gk20a *g, |
320 | struct device *dev, | 302 | struct device *dev, |
321 | struct dma_buf *dmabuf, | 303 | struct dma_buf *dmabuf, |
322 | struct gk20a_comptag_allocator *allocator, | 304 | struct gk20a_comptag_allocator *allocator, |
323 | u32 lines, bool user_mappable, | 305 | u32 lines, bool user_mappable, |
324 | u64 *ctag_map_win_size, | 306 | u64 *ctag_map_win_size, |
325 | u32 *ctag_map_win_ctagline) | 307 | u32 *ctag_map_win_ctagline) |
326 | { | 308 | { |
327 | struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev); | 309 | struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev); |
328 | u32 ctaglines_allocsize; | 310 | u32 ctaglines_allocsize; |
@@ -493,9 +475,9 @@ static void gk20a_remove_mm_support(struct mm_gk20a *mm) | |||
493 | g->ops.mm.remove_bar2_vm(g); | 475 | g->ops.mm.remove_bar2_vm(g); |
494 | 476 | ||
495 | if (g->ops.mm.is_bar1_supported(g)) | 477 | if (g->ops.mm.is_bar1_supported(g)) |
496 | gk20a_remove_vm(&mm->bar1.vm, &mm->bar1.inst_block); | 478 | nvgpu_remove_vm(&mm->bar1.vm, &mm->bar1.inst_block); |
497 | 479 | ||
498 | gk20a_remove_vm(&mm->pmu.vm, &mm->pmu.inst_block); | 480 | nvgpu_remove_vm(&mm->pmu.vm, &mm->pmu.inst_block); |
499 | gk20a_free_inst_block(gk20a_from_mm(mm), &mm->hwpm.inst_block); | 481 | gk20a_free_inst_block(gk20a_from_mm(mm), &mm->hwpm.inst_block); |
500 | nvgpu_vm_remove_support_nofree(&mm->cde.vm); | 482 | nvgpu_vm_remove_support_nofree(&mm->cde.vm); |
501 | 483 | ||
@@ -1097,11 +1079,11 @@ static struct vm_reserved_va_node *addr_to_reservation(struct vm_gk20a *vm, | |||
1097 | } | 1079 | } |
1098 | 1080 | ||
1099 | int nvgpu_vm_get_buffers(struct vm_gk20a *vm, | 1081 | int nvgpu_vm_get_buffers(struct vm_gk20a *vm, |
1100 | struct mapped_buffer_node ***mapped_buffers, | 1082 | struct nvgpu_mapped_buf ***mapped_buffers, |
1101 | int *num_buffers) | 1083 | int *num_buffers) |
1102 | { | 1084 | { |
1103 | struct mapped_buffer_node *mapped_buffer; | 1085 | struct nvgpu_mapped_buf *mapped_buffer; |
1104 | struct mapped_buffer_node **buffer_list; | 1086 | struct nvgpu_mapped_buf **buffer_list; |
1105 | struct nvgpu_rbtree_node *node = NULL; | 1087 | struct nvgpu_rbtree_node *node = NULL; |
1106 | int i = 0; | 1088 | int i = 0; |
1107 | 1089 | ||
@@ -1141,15 +1123,15 @@ int nvgpu_vm_get_buffers(struct vm_gk20a *vm, | |||
1141 | return 0; | 1123 | return 0; |
1142 | } | 1124 | } |
1143 | 1125 | ||
1144 | static void gk20a_vm_unmap_locked_kref(struct kref *ref) | 1126 | void gk20a_vm_unmap_locked_kref(struct kref *ref) |
1145 | { | 1127 | { |
1146 | struct mapped_buffer_node *mapped_buffer = | 1128 | struct nvgpu_mapped_buf *mapped_buffer = |
1147 | container_of(ref, struct mapped_buffer_node, ref); | 1129 | container_of(ref, struct nvgpu_mapped_buf, ref); |
1148 | nvgpu_vm_unmap_locked(mapped_buffer, mapped_buffer->vm->kref_put_batch); | 1130 | nvgpu_vm_unmap_locked(mapped_buffer, mapped_buffer->vm->kref_put_batch); |
1149 | } | 1131 | } |
1150 | 1132 | ||
1151 | void nvgpu_vm_put_buffers(struct vm_gk20a *vm, | 1133 | void nvgpu_vm_put_buffers(struct vm_gk20a *vm, |
1152 | struct mapped_buffer_node **mapped_buffers, | 1134 | struct nvgpu_mapped_buf **mapped_buffers, |
1153 | int num_buffers) | 1135 | int num_buffers) |
1154 | { | 1136 | { |
1155 | int i; | 1137 | int i; |
@@ -1177,11 +1159,11 @@ static void nvgpu_vm_unmap_user(struct vm_gk20a *vm, u64 offset, | |||
1177 | struct vm_gk20a_mapping_batch *batch) | 1159 | struct vm_gk20a_mapping_batch *batch) |
1178 | { | 1160 | { |
1179 | struct gk20a *g = vm->mm->g; | 1161 | struct gk20a *g = vm->mm->g; |
1180 | struct mapped_buffer_node *mapped_buffer; | 1162 | struct nvgpu_mapped_buf *mapped_buffer; |
1181 | 1163 | ||
1182 | nvgpu_mutex_acquire(&vm->update_gmmu_lock); | 1164 | nvgpu_mutex_acquire(&vm->update_gmmu_lock); |
1183 | 1165 | ||
1184 | mapped_buffer = find_mapped_buffer_locked(vm->mapped_buffers, offset); | 1166 | mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, offset); |
1185 | if (!mapped_buffer) { | 1167 | if (!mapped_buffer) { |
1186 | nvgpu_mutex_release(&vm->update_gmmu_lock); | 1168 | nvgpu_mutex_release(&vm->update_gmmu_lock); |
1187 | nvgpu_err(g, "invalid addr to unmap 0x%llx", offset); | 1169 | nvgpu_err(g, "invalid addr to unmap 0x%llx", offset); |
@@ -1273,100 +1255,10 @@ int gk20a_vm_free_va(struct vm_gk20a *vm, | |||
1273 | return 0; | 1255 | return 0; |
1274 | } | 1256 | } |
1275 | 1257 | ||
1276 | 1258 | int setup_buffer_kind_and_compression(struct vm_gk20a *vm, | |
1277 | static void remove_mapped_buffer(struct vm_gk20a *vm, | 1259 | u32 flags, |
1278 | struct mapped_buffer_node *mapped_buffer) | 1260 | struct buffer_attrs *bfr, |
1279 | { | 1261 | enum gmmu_pgsz_gk20a pgsz_idx) |
1280 | nvgpu_rbtree_unlink(&mapped_buffer->node, &vm->mapped_buffers); | ||
1281 | } | ||
1282 | |||
1283 | static int insert_mapped_buffer(struct vm_gk20a *vm, | ||
1284 | struct mapped_buffer_node *mapped_buffer) | ||
1285 | { | ||
1286 | mapped_buffer->node.key_start = mapped_buffer->addr; | ||
1287 | mapped_buffer->node.key_end = mapped_buffer->addr + mapped_buffer->size; | ||
1288 | |||
1289 | nvgpu_rbtree_insert(&mapped_buffer->node, &vm->mapped_buffers); | ||
1290 | |||
1291 | return 0; | ||
1292 | } | ||
1293 | |||
1294 | static struct mapped_buffer_node *find_mapped_buffer_reverse_locked( | ||
1295 | struct nvgpu_rbtree_node *root, struct dma_buf *dmabuf, | ||
1296 | u32 kind) | ||
1297 | { | ||
1298 | struct nvgpu_rbtree_node *node = NULL; | ||
1299 | |||
1300 | nvgpu_rbtree_enum_start(0, &node, root); | ||
1301 | |||
1302 | while (node) { | ||
1303 | struct mapped_buffer_node *mapped_buffer = | ||
1304 | mapped_buffer_from_rbtree_node(node); | ||
1305 | |||
1306 | if (mapped_buffer->dmabuf == dmabuf && | ||
1307 | kind == mapped_buffer->kind) | ||
1308 | return mapped_buffer; | ||
1309 | |||
1310 | nvgpu_rbtree_enum_next(&node, node); | ||
1311 | } | ||
1312 | |||
1313 | return NULL; | ||
1314 | } | ||
1315 | |||
1316 | static struct mapped_buffer_node *find_mapped_buffer_locked( | ||
1317 | struct nvgpu_rbtree_node *root, u64 addr) | ||
1318 | { | ||
1319 | struct nvgpu_rbtree_node *node = NULL; | ||
1320 | |||
1321 | nvgpu_rbtree_search(addr, &node, root); | ||
1322 | if (!node) | ||
1323 | return NULL; | ||
1324 | |||
1325 | return mapped_buffer_from_rbtree_node(node); | ||
1326 | } | ||
1327 | |||
1328 | static struct mapped_buffer_node *find_mapped_buffer_range_locked( | ||
1329 | struct nvgpu_rbtree_node *root, u64 addr) | ||
1330 | { | ||
1331 | struct nvgpu_rbtree_node *node = NULL; | ||
1332 | |||
1333 | nvgpu_rbtree_range_search(addr, &node, root); | ||
1334 | if (!node) | ||
1335 | return NULL; | ||
1336 | |||
1337 | return mapped_buffer_from_rbtree_node(node); | ||
1338 | } | ||
1339 | |||
1340 | /* find the first mapped buffer with GPU VA less than addr */ | ||
1341 | static struct mapped_buffer_node *find_mapped_buffer_less_than_locked( | ||
1342 | struct nvgpu_rbtree_node *root, u64 addr) | ||
1343 | { | ||
1344 | struct nvgpu_rbtree_node *node = NULL; | ||
1345 | |||
1346 | nvgpu_rbtree_less_than_search(addr, &node, root); | ||
1347 | if (!node) | ||
1348 | return NULL; | ||
1349 | |||
1350 | return mapped_buffer_from_rbtree_node(node); | ||
1351 | } | ||
1352 | |||
1353 | struct buffer_attrs { | ||
1354 | struct sg_table *sgt; | ||
1355 | u64 size; | ||
1356 | u64 align; | ||
1357 | u32 ctag_offset; | ||
1358 | u32 ctag_lines; | ||
1359 | u32 ctag_allocated_lines; | ||
1360 | int pgsz_idx; | ||
1361 | u8 kind_v; | ||
1362 | u8 uc_kind_v; | ||
1363 | bool ctag_user_mappable; | ||
1364 | }; | ||
1365 | |||
1366 | static int setup_buffer_kind_and_compression(struct vm_gk20a *vm, | ||
1367 | u32 flags, | ||
1368 | struct buffer_attrs *bfr, | ||
1369 | enum gmmu_pgsz_gk20a pgsz_idx) | ||
1370 | { | 1262 | { |
1371 | bool kind_compressible; | 1263 | bool kind_compressible; |
1372 | struct gk20a *g = gk20a_from_vm(vm); | 1264 | struct gk20a *g = gk20a_from_vm(vm); |
@@ -1409,14 +1301,14 @@ static int setup_buffer_kind_and_compression(struct vm_gk20a *vm, | |||
1409 | return 0; | 1301 | return 0; |
1410 | } | 1302 | } |
1411 | 1303 | ||
1412 | static int validate_fixed_buffer(struct vm_gk20a *vm, | 1304 | int validate_fixed_buffer(struct vm_gk20a *vm, |
1413 | struct buffer_attrs *bfr, | 1305 | struct buffer_attrs *bfr, |
1414 | u64 map_offset, u64 map_size, | 1306 | u64 map_offset, u64 map_size, |
1415 | struct vm_reserved_va_node **pva_node) | 1307 | struct vm_reserved_va_node **pva_node) |
1416 | { | 1308 | { |
1417 | struct gk20a *g = vm->mm->g; | 1309 | struct gk20a *g = vm->mm->g; |
1418 | struct vm_reserved_va_node *va_node; | 1310 | struct vm_reserved_va_node *va_node; |
1419 | struct mapped_buffer_node *buffer; | 1311 | struct nvgpu_mapped_buf *buffer; |
1420 | u64 map_end = map_offset + map_size; | 1312 | u64 map_end = map_offset + map_size; |
1421 | 1313 | ||
1422 | /* can wrap around with insane map_size; zero is disallowed too */ | 1314 | /* can wrap around with insane map_size; zero is disallowed too */ |
@@ -1448,8 +1340,8 @@ static int validate_fixed_buffer(struct vm_gk20a *vm, | |||
1448 | /* check that this mapping does not collide with existing | 1340 | /* check that this mapping does not collide with existing |
1449 | * mappings by checking the buffer with the highest GPU VA | 1341 | * mappings by checking the buffer with the highest GPU VA |
1450 | * that is less than our buffer end */ | 1342 | * that is less than our buffer end */ |
1451 | buffer = find_mapped_buffer_less_than_locked( | 1343 | buffer = __nvgpu_vm_find_mapped_buf_less_than( |
1452 | vm->mapped_buffers, map_offset + map_size); | 1344 | vm, map_offset + map_size); |
1453 | if (buffer && buffer->addr + buffer->size > map_offset) { | 1345 | if (buffer && buffer->addr + buffer->size > map_offset) { |
1454 | nvgpu_warn(g, "overlapping buffer map requested"); | 1346 | nvgpu_warn(g, "overlapping buffer map requested"); |
1455 | return -EINVAL; | 1347 | return -EINVAL; |
@@ -1499,11 +1391,11 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, | |||
1499 | "[dma: 0x%02x_%08x, pa: 0x%02x_%08x] " | 1391 | "[dma: 0x%02x_%08x, pa: 0x%02x_%08x] " |
1500 | "pgsz=%-3dKb as=%-2d ctags=%d start=%d " | 1392 | "pgsz=%-3dKb as=%-2d ctags=%d start=%d " |
1501 | "kind=0x%x flags=0x%x apt=%s", | 1393 | "kind=0x%x flags=0x%x apt=%s", |
1502 | hi32(map_offset), lo32(map_offset), size, | 1394 | u64_hi32(map_offset), u64_lo32(map_offset), size, |
1503 | sgt ? hi32((u64)sg_dma_address(sgt->sgl)) : 0, | 1395 | sgt ? u64_hi32((u64)sg_dma_address(sgt->sgl)) : 0, |
1504 | sgt ? lo32((u64)sg_dma_address(sgt->sgl)) : 0, | 1396 | sgt ? u64_lo32((u64)sg_dma_address(sgt->sgl)) : 0, |
1505 | sgt ? hi32((u64)sg_phys(sgt->sgl)) : 0, | 1397 | sgt ? u64_hi32((u64)sg_phys(sgt->sgl)) : 0, |
1506 | sgt ? lo32((u64)sg_phys(sgt->sgl)) : 0, | 1398 | sgt ? u64_lo32((u64)sg_phys(sgt->sgl)) : 0, |
1507 | vm->gmmu_page_sizes[pgsz_idx] >> 10, vm_aspace_id(vm), | 1399 | vm->gmmu_page_sizes[pgsz_idx] >> 10, vm_aspace_id(vm), |
1508 | ctag_lines, ctag_offset, | 1400 | ctag_lines, ctag_offset, |
1509 | kind_v, flags, nvgpu_aperture_str(aperture)); | 1401 | kind_v, flags, nvgpu_aperture_str(aperture)); |
@@ -1595,8 +1487,8 @@ void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm, | |||
1595 | } | 1487 | } |
1596 | } | 1488 | } |
1597 | 1489 | ||
1598 | static enum nvgpu_aperture gk20a_dmabuf_aperture(struct gk20a *g, | 1490 | enum nvgpu_aperture gk20a_dmabuf_aperture(struct gk20a *g, |
1599 | struct dma_buf *dmabuf) | 1491 | struct dma_buf *dmabuf) |
1600 | { | 1492 | { |
1601 | struct gk20a *buf_owner = gk20a_vidmem_buf_owner(dmabuf); | 1493 | struct gk20a *buf_owner = gk20a_vidmem_buf_owner(dmabuf); |
1602 | if (buf_owner == NULL) { | 1494 | if (buf_owner == NULL) { |
@@ -1617,80 +1509,6 @@ static enum nvgpu_aperture gk20a_dmabuf_aperture(struct gk20a *g, | |||
1617 | } | 1509 | } |
1618 | } | 1510 | } |
1619 | 1511 | ||
1620 | static u64 nvgpu_vm_map_duplicate_locked(struct vm_gk20a *vm, | ||
1621 | struct dma_buf *dmabuf, | ||
1622 | u64 offset_align, | ||
1623 | u32 flags, | ||
1624 | int kind, | ||
1625 | struct sg_table **sgt, | ||
1626 | bool user_mapped, | ||
1627 | int rw_flag) | ||
1628 | { | ||
1629 | struct gk20a *g = gk20a_from_vm(vm); | ||
1630 | struct mapped_buffer_node *mapped_buffer = NULL; | ||
1631 | |||
1632 | if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) { | ||
1633 | mapped_buffer = find_mapped_buffer_locked(vm->mapped_buffers, | ||
1634 | offset_align); | ||
1635 | if (!mapped_buffer) | ||
1636 | return 0; | ||
1637 | |||
1638 | if (mapped_buffer->dmabuf != dmabuf || | ||
1639 | mapped_buffer->kind != (u32)kind) | ||
1640 | return 0; | ||
1641 | } else { | ||
1642 | mapped_buffer = | ||
1643 | find_mapped_buffer_reverse_locked(vm->mapped_buffers, | ||
1644 | dmabuf, kind); | ||
1645 | if (!mapped_buffer) | ||
1646 | return 0; | ||
1647 | } | ||
1648 | |||
1649 | if (mapped_buffer->flags != flags) | ||
1650 | return 0; | ||
1651 | |||
1652 | BUG_ON(mapped_buffer->vm != vm); | ||
1653 | |||
1654 | /* mark the buffer as used */ | ||
1655 | if (user_mapped) { | ||
1656 | if (mapped_buffer->user_mapped == 0) | ||
1657 | vm->num_user_mapped_buffers++; | ||
1658 | mapped_buffer->user_mapped++; | ||
1659 | |||
1660 | /* If the mapping comes from user space, we own | ||
1661 | * the handle ref. Since we reuse an | ||
1662 | * existing mapping here, we need to give back those | ||
1663 | * refs once in order not to leak. | ||
1664 | */ | ||
1665 | if (mapped_buffer->own_mem_ref) | ||
1666 | dma_buf_put(mapped_buffer->dmabuf); | ||
1667 | else | ||
1668 | mapped_buffer->own_mem_ref = true; | ||
1669 | } | ||
1670 | kref_get(&mapped_buffer->ref); | ||
1671 | |||
1672 | gk20a_dbg(gpu_dbg_map, | ||
1673 | "gv: 0x%04x_%08x + 0x%-7zu " | ||
1674 | "[dma: 0x%02x_%08x, pa: 0x%02x_%08x] " | ||
1675 | "pgsz=%-3dKb as=%-2d ctags=%d start=%d " | ||
1676 | "flags=0x%x apt=%s (reused)", | ||
1677 | hi32(mapped_buffer->addr), lo32(mapped_buffer->addr), | ||
1678 | dmabuf->size, | ||
1679 | hi32((u64)sg_dma_address(mapped_buffer->sgt->sgl)), | ||
1680 | lo32((u64)sg_dma_address(mapped_buffer->sgt->sgl)), | ||
1681 | hi32((u64)sg_phys(mapped_buffer->sgt->sgl)), | ||
1682 | lo32((u64)sg_phys(mapped_buffer->sgt->sgl)), | ||
1683 | vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10, | ||
1684 | vm_aspace_id(vm), | ||
1685 | mapped_buffer->ctag_lines, mapped_buffer->ctag_offset, | ||
1686 | mapped_buffer->flags, | ||
1687 | nvgpu_aperture_str(gk20a_dmabuf_aperture(g, dmabuf))); | ||
1688 | |||
1689 | if (sgt) | ||
1690 | *sgt = mapped_buffer->sgt; | ||
1691 | return mapped_buffer->addr; | ||
1692 | } | ||
1693 | |||
1694 | #if defined(CONFIG_GK20A_VIDMEM) | 1512 | #if defined(CONFIG_GK20A_VIDMEM) |
1695 | static struct sg_table *gk20a_vidbuf_map_dma_buf( | 1513 | static struct sg_table *gk20a_vidbuf_map_dma_buf( |
1696 | struct dma_buf_attachment *attach, enum dma_data_direction dir) | 1514 | struct dma_buf_attachment *attach, enum dma_data_direction dir) |
@@ -1919,310 +1737,6 @@ int gk20a_vidbuf_access_memory(struct gk20a *g, struct dma_buf *dmabuf, | |||
1919 | #endif | 1737 | #endif |
1920 | } | 1738 | } |
1921 | 1739 | ||
1922 | static u64 gk20a_mm_get_align(struct gk20a *g, struct scatterlist *sgl, | ||
1923 | enum nvgpu_aperture aperture) | ||
1924 | { | ||
1925 | u64 align = 0, chunk_align = 0; | ||
1926 | u64 buf_addr; | ||
1927 | |||
1928 | if (aperture == APERTURE_VIDMEM) { | ||
1929 | struct nvgpu_page_alloc *alloc = get_vidmem_page_alloc(sgl); | ||
1930 | struct page_alloc_chunk *chunk = NULL; | ||
1931 | |||
1932 | nvgpu_list_for_each_entry(chunk, &alloc->alloc_chunks, | ||
1933 | page_alloc_chunk, list_entry) { | ||
1934 | chunk_align = 1ULL << __ffs(chunk->base | chunk->length); | ||
1935 | |||
1936 | if (align) | ||
1937 | align = min(align, chunk_align); | ||
1938 | else | ||
1939 | align = chunk_align; | ||
1940 | } | ||
1941 | |||
1942 | return align; | ||
1943 | } | ||
1944 | |||
1945 | buf_addr = (u64)sg_dma_address(sgl); | ||
1946 | |||
1947 | if (g->mm.bypass_smmu || buf_addr == DMA_ERROR_CODE || !buf_addr) { | ||
1948 | while (sgl) { | ||
1949 | buf_addr = (u64)sg_phys(sgl); | ||
1950 | chunk_align = 1ULL << __ffs(buf_addr | (u64)sgl->length); | ||
1951 | |||
1952 | if (align) | ||
1953 | align = min(align, chunk_align); | ||
1954 | else | ||
1955 | align = chunk_align; | ||
1956 | sgl = sg_next(sgl); | ||
1957 | } | ||
1958 | |||
1959 | return align; | ||
1960 | } | ||
1961 | |||
1962 | align = 1ULL << __ffs(buf_addr); | ||
1963 | |||
1964 | return align; | ||
1965 | } | ||
1966 | |||
1967 | u64 nvgpu_vm_map(struct vm_gk20a *vm, | ||
1968 | struct dma_buf *dmabuf, | ||
1969 | u64 offset_align, | ||
1970 | u32 flags /*NVGPU_AS_MAP_BUFFER_FLAGS_*/, | ||
1971 | int kind, | ||
1972 | struct sg_table **sgt, | ||
1973 | bool user_mapped, | ||
1974 | int rw_flag, | ||
1975 | u64 buffer_offset, | ||
1976 | u64 mapping_size, | ||
1977 | struct vm_gk20a_mapping_batch *batch) | ||
1978 | { | ||
1979 | struct gk20a *g = gk20a_from_vm(vm); | ||
1980 | struct gk20a_comptag_allocator *ctag_allocator = &g->gr.comp_tags; | ||
1981 | struct device *d = dev_from_vm(vm); | ||
1982 | struct mapped_buffer_node *mapped_buffer = NULL; | ||
1983 | bool inserted = false, va_allocated = false; | ||
1984 | u64 map_offset = 0; | ||
1985 | int err = 0; | ||
1986 | struct buffer_attrs bfr = {NULL}; | ||
1987 | struct gk20a_comptags comptags; | ||
1988 | bool clear_ctags = false; | ||
1989 | struct scatterlist *sgl; | ||
1990 | u64 ctag_map_win_size = 0; | ||
1991 | u32 ctag_map_win_ctagline = 0; | ||
1992 | struct vm_reserved_va_node *va_node = NULL; | ||
1993 | u32 ctag_offset; | ||
1994 | enum nvgpu_aperture aperture; | ||
1995 | |||
1996 | if (user_mapped && vm->userspace_managed && | ||
1997 | !(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)) { | ||
1998 | nvgpu_err(g, | ||
1999 | "%s: non-fixed-offset mapping not available on userspace managed address spaces", | ||
2000 | __func__); | ||
2001 | return -EFAULT; | ||
2002 | } | ||
2003 | |||
2004 | nvgpu_mutex_acquire(&vm->update_gmmu_lock); | ||
2005 | |||
2006 | /* check if this buffer is already mapped */ | ||
2007 | if (!vm->userspace_managed) { | ||
2008 | map_offset = nvgpu_vm_map_duplicate_locked( | ||
2009 | vm, dmabuf, offset_align, | ||
2010 | flags, kind, sgt, | ||
2011 | user_mapped, rw_flag); | ||
2012 | if (map_offset) { | ||
2013 | nvgpu_mutex_release(&vm->update_gmmu_lock); | ||
2014 | return map_offset; | ||
2015 | } | ||
2016 | } | ||
2017 | |||
2018 | /* pin buffer to get phys/iovmm addr */ | ||
2019 | bfr.sgt = gk20a_mm_pin(d, dmabuf); | ||
2020 | if (IS_ERR(bfr.sgt)) { | ||
2021 | /* Falling back to physical is actually possible | ||
2022 | * here in many cases if we use 4K phys pages in the | ||
2023 | * gmmu. However we have some regions which require | ||
2024 | * contig regions to work properly (either phys-contig | ||
2025 | * or contig through smmu io_vaspace). Until we can | ||
2026 | * track the difference between those two cases we have | ||
2027 | * to fail the mapping when we run out of SMMU space. | ||
2028 | */ | ||
2029 | nvgpu_warn(g, "oom allocating tracking buffer"); | ||
2030 | goto clean_up; | ||
2031 | } | ||
2032 | |||
2033 | if (sgt) | ||
2034 | *sgt = bfr.sgt; | ||
2035 | |||
2036 | bfr.kind_v = kind; | ||
2037 | bfr.size = dmabuf->size; | ||
2038 | sgl = bfr.sgt->sgl; | ||
2039 | |||
2040 | aperture = gk20a_dmabuf_aperture(g, dmabuf); | ||
2041 | if (aperture == APERTURE_INVALID) { | ||
2042 | err = -EINVAL; | ||
2043 | goto clean_up; | ||
2044 | } | ||
2045 | |||
2046 | if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) | ||
2047 | map_offset = offset_align; | ||
2048 | |||
2049 | bfr.align = gk20a_mm_get_align(g, sgl, aperture); | ||
2050 | bfr.pgsz_idx = __get_pte_size(vm, map_offset, | ||
2051 | min_t(u64, bfr.size, bfr.align)); | ||
2052 | mapping_size = mapping_size ? mapping_size : bfr.size; | ||
2053 | |||
2054 | /* Check if we should use a fixed offset for mapping this buffer */ | ||
2055 | if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) { | ||
2056 | err = validate_fixed_buffer(vm, &bfr, | ||
2057 | offset_align, mapping_size, | ||
2058 | &va_node); | ||
2059 | if (err) | ||
2060 | goto clean_up; | ||
2061 | |||
2062 | map_offset = offset_align; | ||
2063 | va_allocated = false; | ||
2064 | } else | ||
2065 | va_allocated = true; | ||
2066 | |||
2067 | if (sgt) | ||
2068 | *sgt = bfr.sgt; | ||
2069 | |||
2070 | err = setup_buffer_kind_and_compression(vm, flags, &bfr, bfr.pgsz_idx); | ||
2071 | if (unlikely(err)) { | ||
2072 | nvgpu_err(g, "failure setting up kind and compression"); | ||
2073 | goto clean_up; | ||
2074 | } | ||
2075 | |||
2076 | /* bar1 and pmu vm don't need ctag */ | ||
2077 | if (!vm->enable_ctag) | ||
2078 | bfr.ctag_lines = 0; | ||
2079 | |||
2080 | gk20a_get_comptags(d, dmabuf, &comptags); | ||
2081 | |||
2082 | /* ensure alignment to compression page size if compression enabled */ | ||
2083 | if (bfr.ctag_offset) | ||
2084 | mapping_size = ALIGN(mapping_size, | ||
2085 | g->ops.fb.compression_page_size(g)); | ||
2086 | |||
2087 | if (bfr.ctag_lines && !comptags.lines) { | ||
2088 | const bool user_mappable = | ||
2089 | !!(flags & NVGPU_AS_MAP_BUFFER_FLAGS_MAPPABLE_COMPBITS); | ||
2090 | |||
2091 | /* allocate compression resources if needed */ | ||
2092 | err = gk20a_alloc_comptags(g, d, dmabuf, ctag_allocator, | ||
2093 | bfr.ctag_lines, user_mappable, | ||
2094 | &ctag_map_win_size, | ||
2095 | &ctag_map_win_ctagline); | ||
2096 | if (err) { | ||
2097 | /* ok to fall back here if we ran out */ | ||
2098 | /* TBD: we can partially alloc ctags as well... */ | ||
2099 | bfr.kind_v = bfr.uc_kind_v; | ||
2100 | } else { | ||
2101 | gk20a_get_comptags(d, dmabuf, &comptags); | ||
2102 | |||
2103 | if (g->ops.ltc.cbc_ctrl) | ||
2104 | g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_clear, | ||
2105 | comptags.offset, | ||
2106 | comptags.offset + | ||
2107 | comptags.allocated_lines - 1); | ||
2108 | else | ||
2109 | clear_ctags = true; | ||
2110 | } | ||
2111 | } | ||
2112 | |||
2113 | /* store the comptag info */ | ||
2114 | bfr.ctag_offset = comptags.offset; | ||
2115 | bfr.ctag_lines = comptags.lines; | ||
2116 | bfr.ctag_allocated_lines = comptags.allocated_lines; | ||
2117 | bfr.ctag_user_mappable = comptags.user_mappable; | ||
2118 | |||
2119 | /* | ||
2120 | * Calculate comptag index for this mapping. Differs in | ||
2121 | * case of partial mapping. | ||
2122 | */ | ||
2123 | ctag_offset = comptags.offset; | ||
2124 | if (ctag_offset) | ||
2125 | ctag_offset += buffer_offset >> | ||
2126 | ilog2(g->ops.fb.compression_page_size(g)); | ||
2127 | |||
2128 | /* update gmmu ptes */ | ||
2129 | map_offset = g->ops.mm.gmmu_map(vm, map_offset, | ||
2130 | bfr.sgt, | ||
2131 | buffer_offset, /* sg offset */ | ||
2132 | mapping_size, | ||
2133 | bfr.pgsz_idx, | ||
2134 | bfr.kind_v, | ||
2135 | ctag_offset, | ||
2136 | flags, rw_flag, | ||
2137 | clear_ctags, | ||
2138 | false, | ||
2139 | false, | ||
2140 | batch, | ||
2141 | aperture); | ||
2142 | if (!map_offset) | ||
2143 | goto clean_up; | ||
2144 | |||
2145 | #if defined(NVHOST_DEBUG) | ||
2146 | { | ||
2147 | int i; | ||
2148 | struct scatterlist *sg = NULL; | ||
2149 | gk20a_dbg(gpu_dbg_pte, "for_each_sg(bfr.sgt->sgl, sg, bfr.sgt->nents, i)"); | ||
2150 | for_each_sg(bfr.sgt->sgl, sg, bfr.sgt->nents, i ) { | ||
2151 | u64 da = sg_dma_address(sg); | ||
2152 | u64 pa = sg_phys(sg); | ||
2153 | u64 len = sg->length; | ||
2154 | gk20a_dbg(gpu_dbg_pte, "i=%d pa=0x%x,%08x da=0x%x,%08x len=0x%x,%08x", | ||
2155 | i, hi32(pa), lo32(pa), hi32(da), lo32(da), | ||
2156 | hi32(len), lo32(len)); | ||
2157 | } | ||
2158 | } | ||
2159 | #endif | ||
2160 | |||
2161 | /* keep track of the buffer for unmapping */ | ||
2162 | /* TBD: check for multiple mapping of same buffer */ | ||
2163 | mapped_buffer = nvgpu_kzalloc(g, sizeof(*mapped_buffer)); | ||
2164 | if (!mapped_buffer) { | ||
2165 | nvgpu_warn(g, "oom allocating tracking buffer"); | ||
2166 | goto clean_up; | ||
2167 | } | ||
2168 | mapped_buffer->dmabuf = dmabuf; | ||
2169 | mapped_buffer->sgt = bfr.sgt; | ||
2170 | mapped_buffer->addr = map_offset; | ||
2171 | mapped_buffer->size = mapping_size; | ||
2172 | mapped_buffer->pgsz_idx = bfr.pgsz_idx; | ||
2173 | mapped_buffer->ctag_offset = bfr.ctag_offset; | ||
2174 | mapped_buffer->ctag_lines = bfr.ctag_lines; | ||
2175 | mapped_buffer->ctag_allocated_lines = bfr.ctag_allocated_lines; | ||
2176 | mapped_buffer->ctags_mappable = bfr.ctag_user_mappable; | ||
2177 | mapped_buffer->ctag_map_win_size = ctag_map_win_size; | ||
2178 | mapped_buffer->ctag_map_win_ctagline = ctag_map_win_ctagline; | ||
2179 | mapped_buffer->vm = vm; | ||
2180 | mapped_buffer->flags = flags; | ||
2181 | mapped_buffer->kind = kind; | ||
2182 | mapped_buffer->va_allocated = va_allocated; | ||
2183 | mapped_buffer->user_mapped = user_mapped ? 1 : 0; | ||
2184 | mapped_buffer->own_mem_ref = user_mapped; | ||
2185 | nvgpu_init_list_node(&mapped_buffer->va_buffers_list); | ||
2186 | kref_init(&mapped_buffer->ref); | ||
2187 | |||
2188 | err = insert_mapped_buffer(vm, mapped_buffer); | ||
2189 | if (err) { | ||
2190 | nvgpu_err(g, "failed to insert into mapped buffer tree"); | ||
2191 | goto clean_up; | ||
2192 | } | ||
2193 | inserted = true; | ||
2194 | if (user_mapped) | ||
2195 | vm->num_user_mapped_buffers++; | ||
2196 | |||
2197 | gk20a_dbg_info("allocated va @ 0x%llx", map_offset); | ||
2198 | |||
2199 | if (va_node) { | ||
2200 | nvgpu_list_add_tail(&mapped_buffer->va_buffers_list, | ||
2201 | &va_node->va_buffers_list); | ||
2202 | mapped_buffer->va_node = va_node; | ||
2203 | } | ||
2204 | |||
2205 | nvgpu_mutex_release(&vm->update_gmmu_lock); | ||
2206 | |||
2207 | return map_offset; | ||
2208 | |||
2209 | clean_up: | ||
2210 | if (inserted) { | ||
2211 | remove_mapped_buffer(vm, mapped_buffer); | ||
2212 | if (user_mapped) | ||
2213 | vm->num_user_mapped_buffers--; | ||
2214 | } | ||
2215 | nvgpu_kfree(g, mapped_buffer); | ||
2216 | if (va_allocated) | ||
2217 | gk20a_vm_free_va(vm, map_offset, bfr.size, bfr.pgsz_idx); | ||
2218 | if (!IS_ERR(bfr.sgt)) | ||
2219 | gk20a_mm_unpin(d, dmabuf, bfr.sgt); | ||
2220 | |||
2221 | nvgpu_mutex_release(&vm->update_gmmu_lock); | ||
2222 | gk20a_dbg_info("err=%d\n", err); | ||
2223 | return 0; | ||
2224 | } | ||
2225 | |||
2226 | int nvgpu_vm_get_compbits_info(struct vm_gk20a *vm, | 1740 | int nvgpu_vm_get_compbits_info(struct vm_gk20a *vm, |
2227 | u64 mapping_gva, | 1741 | u64 mapping_gva, |
2228 | u64 *compbits_win_size, | 1742 | u64 *compbits_win_size, |
@@ -2230,12 +1744,12 @@ int nvgpu_vm_get_compbits_info(struct vm_gk20a *vm, | |||
2230 | u32 *mapping_ctagline, | 1744 | u32 *mapping_ctagline, |
2231 | u32 *flags) | 1745 | u32 *flags) |
2232 | { | 1746 | { |
2233 | struct mapped_buffer_node *mapped_buffer; | 1747 | struct nvgpu_mapped_buf *mapped_buffer; |
2234 | struct gk20a *g = vm->mm->g; | 1748 | struct gk20a *g = vm->mm->g; |
2235 | 1749 | ||
2236 | nvgpu_mutex_acquire(&vm->update_gmmu_lock); | 1750 | nvgpu_mutex_acquire(&vm->update_gmmu_lock); |
2237 | 1751 | ||
2238 | mapped_buffer = find_mapped_buffer_locked(vm->mapped_buffers, mapping_gva); | 1752 | mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, mapping_gva); |
2239 | 1753 | ||
2240 | if (!mapped_buffer || !mapped_buffer->user_mapped) | 1754 | if (!mapped_buffer || !mapped_buffer->user_mapped) |
2241 | { | 1755 | { |
@@ -2271,7 +1785,7 @@ int nvgpu_vm_map_compbits(struct vm_gk20a *vm, | |||
2271 | u64 *mapping_iova, | 1785 | u64 *mapping_iova, |
2272 | u32 flags) | 1786 | u32 flags) |
2273 | { | 1787 | { |
2274 | struct mapped_buffer_node *mapped_buffer; | 1788 | struct nvgpu_mapped_buf *mapped_buffer; |
2275 | struct gk20a *g = gk20a_from_vm(vm); | 1789 | struct gk20a *g = gk20a_from_vm(vm); |
2276 | const bool fixed_mapping = | 1790 | const bool fixed_mapping = |
2277 | (flags & NVGPU_AS_MAP_BUFFER_COMPBITS_FLAGS_FIXED_OFFSET) != 0; | 1791 | (flags & NVGPU_AS_MAP_BUFFER_COMPBITS_FLAGS_FIXED_OFFSET) != 0; |
@@ -2292,8 +1806,7 @@ int nvgpu_vm_map_compbits(struct vm_gk20a *vm, | |||
2292 | 1806 | ||
2293 | nvgpu_mutex_acquire(&vm->update_gmmu_lock); | 1807 | nvgpu_mutex_acquire(&vm->update_gmmu_lock); |
2294 | 1808 | ||
2295 | mapped_buffer = | 1809 | mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, mapping_gva); |
2296 | find_mapped_buffer_locked(vm->mapped_buffers, mapping_gva); | ||
2297 | 1810 | ||
2298 | if (!mapped_buffer || !mapped_buffer->user_mapped) { | 1811 | if (!mapped_buffer || !mapped_buffer->user_mapped) { |
2299 | nvgpu_mutex_release(&vm->update_gmmu_lock); | 1812 | nvgpu_mutex_release(&vm->update_gmmu_lock); |
@@ -2537,12 +2050,12 @@ static void gk20a_vidmem_clear_mem_worker(struct work_struct *work) | |||
2537 | 2050 | ||
2538 | dma_addr_t gk20a_mm_gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr) | 2051 | dma_addr_t gk20a_mm_gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr) |
2539 | { | 2052 | { |
2540 | struct mapped_buffer_node *buffer; | 2053 | struct nvgpu_mapped_buf *buffer; |
2541 | dma_addr_t addr = 0; | 2054 | dma_addr_t addr = 0; |
2542 | struct gk20a *g = gk20a_from_vm(vm); | 2055 | struct gk20a *g = gk20a_from_vm(vm); |
2543 | 2056 | ||
2544 | nvgpu_mutex_acquire(&vm->update_gmmu_lock); | 2057 | nvgpu_mutex_acquire(&vm->update_gmmu_lock); |
2545 | buffer = find_mapped_buffer_locked(vm->mapped_buffers, gpu_vaddr); | 2058 | buffer = __nvgpu_vm_find_mapped_buf(vm, gpu_vaddr); |
2546 | if (buffer) | 2059 | if (buffer) |
2547 | addr = g->ops.mm.get_iova_addr(g, buffer->sgt->sgl, | 2060 | addr = g->ops.mm.get_iova_addr(g, buffer->sgt->sgl, |
2548 | buffer->flags); | 2061 | buffer->flags); |
@@ -3026,7 +2539,7 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm, | |||
3026 | } | 2539 | } |
3027 | 2540 | ||
3028 | /* NOTE! mapped_buffers lock must be held */ | 2541 | /* NOTE! mapped_buffers lock must be held */ |
3029 | void nvgpu_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer, | 2542 | void nvgpu_vm_unmap_locked(struct nvgpu_mapped_buf *mapped_buffer, |
3030 | struct vm_gk20a_mapping_batch *batch) | 2543 | struct vm_gk20a_mapping_batch *batch) |
3031 | { | 2544 | { |
3032 | struct vm_gk20a *vm = mapped_buffer->vm; | 2545 | struct vm_gk20a *vm = mapped_buffer->vm; |
@@ -3057,7 +2570,7 @@ void nvgpu_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer, | |||
3057 | 2570 | ||
3058 | gk20a_dbg(gpu_dbg_map, | 2571 | gk20a_dbg(gpu_dbg_map, |
3059 | "gv: 0x%04x_%08x pgsz=%-3dKb as=%-2d own_mem_ref=%d", | 2572 | "gv: 0x%04x_%08x pgsz=%-3dKb as=%-2d own_mem_ref=%d", |
3060 | hi32(mapped_buffer->addr), lo32(mapped_buffer->addr), | 2573 | u64_hi32(mapped_buffer->addr), u64_lo32(mapped_buffer->addr), |
3061 | vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10, | 2574 | vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10, |
3062 | vm_aspace_id(vm), | 2575 | vm_aspace_id(vm), |
3063 | mapped_buffer->own_mem_ref); | 2576 | mapped_buffer->own_mem_ref); |
@@ -3066,9 +2579,9 @@ void nvgpu_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer, | |||
3066 | mapped_buffer->sgt); | 2579 | mapped_buffer->sgt); |
3067 | 2580 | ||
3068 | /* remove from mapped buffer tree and remove list, free */ | 2581 | /* remove from mapped buffer tree and remove list, free */ |
3069 | remove_mapped_buffer(vm, mapped_buffer); | 2582 | nvgpu_remove_mapped_buf(vm, mapped_buffer); |
3070 | if (!nvgpu_list_empty(&mapped_buffer->va_buffers_list)) | 2583 | if (!nvgpu_list_empty(&mapped_buffer->buffer_list)) |
3071 | nvgpu_list_del(&mapped_buffer->va_buffers_list); | 2584 | nvgpu_list_del(&mapped_buffer->buffer_list); |
3072 | 2585 | ||
3073 | /* keep track of mapped buffers */ | 2586 | /* keep track of mapped buffers */ |
3074 | if (mapped_buffer->user_mapped) | 2587 | if (mapped_buffer->user_mapped) |
@@ -3082,22 +2595,6 @@ void nvgpu_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer, | |||
3082 | return; | 2595 | return; |
3083 | } | 2596 | } |
3084 | 2597 | ||
3085 | void nvgpu_vm_unmap(struct vm_gk20a *vm, u64 offset) | ||
3086 | { | ||
3087 | struct gk20a *g = vm->mm->g; | ||
3088 | struct mapped_buffer_node *mapped_buffer; | ||
3089 | |||
3090 | nvgpu_mutex_acquire(&vm->update_gmmu_lock); | ||
3091 | mapped_buffer = find_mapped_buffer_locked(vm->mapped_buffers, offset); | ||
3092 | if (!mapped_buffer) { | ||
3093 | nvgpu_mutex_release(&vm->update_gmmu_lock); | ||
3094 | nvgpu_err(g, "invalid addr to unmap 0x%llx", offset); | ||
3095 | return; | ||
3096 | } | ||
3097 | |||
3098 | kref_put(&mapped_buffer->ref, gk20a_vm_unmap_locked_kref); | ||
3099 | nvgpu_mutex_release(&vm->update_gmmu_lock); | ||
3100 | } | ||
3101 | 2598 | ||
3102 | static void gk20a_vm_free_entries(struct vm_gk20a *vm, | 2599 | static void gk20a_vm_free_entries(struct vm_gk20a *vm, |
3103 | struct gk20a_mm_entry *parent, | 2600 | struct gk20a_mm_entry *parent, |
@@ -3659,7 +3156,7 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, | |||
3659 | va_node->vaddr_start = vaddr_start; | 3156 | va_node->vaddr_start = vaddr_start; |
3660 | va_node->size = (u64)args->page_size * (u64)args->pages; | 3157 | va_node->size = (u64)args->page_size * (u64)args->pages; |
3661 | va_node->pgsz_idx = pgsz_idx; | 3158 | va_node->pgsz_idx = pgsz_idx; |
3662 | nvgpu_init_list_node(&va_node->va_buffers_list); | 3159 | nvgpu_init_list_node(&va_node->buffer_list_head); |
3663 | nvgpu_init_list_node(&va_node->reserved_va_list); | 3160 | nvgpu_init_list_node(&va_node->reserved_va_list); |
3664 | 3161 | ||
3665 | nvgpu_mutex_acquire(&vm->update_gmmu_lock); | 3162 | nvgpu_mutex_acquire(&vm->update_gmmu_lock); |
@@ -3723,15 +3220,15 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share, | |||
3723 | nvgpu_mutex_acquire(&vm->update_gmmu_lock); | 3220 | nvgpu_mutex_acquire(&vm->update_gmmu_lock); |
3724 | va_node = addr_to_reservation(vm, args->offset); | 3221 | va_node = addr_to_reservation(vm, args->offset); |
3725 | if (va_node) { | 3222 | if (va_node) { |
3726 | struct mapped_buffer_node *buffer, *n; | 3223 | struct nvgpu_mapped_buf *buffer, *n; |
3727 | 3224 | ||
3728 | /* Decrement the ref count on all buffers in this va_node. This | 3225 | /* Decrement the ref count on all buffers in this va_node. This |
3729 | * allows userspace to let the kernel free mappings that are | 3226 | * allows userspace to let the kernel free mappings that are |
3730 | * only used by this va_node. */ | 3227 | * only used by this va_node. */ |
3731 | nvgpu_list_for_each_entry_safe(buffer, n, | 3228 | nvgpu_list_for_each_entry_safe(buffer, n, |
3732 | &va_node->va_buffers_list, | 3229 | &va_node->buffer_list_head, |
3733 | mapped_buffer_node, va_buffers_list) { | 3230 | nvgpu_mapped_buf, buffer_list) { |
3734 | nvgpu_list_del(&buffer->va_buffers_list); | 3231 | nvgpu_list_del(&buffer->buffer_list); |
3735 | kref_put(&buffer->ref, gk20a_vm_unmap_locked_kref); | 3232 | kref_put(&buffer->ref, gk20a_vm_unmap_locked_kref); |
3736 | } | 3233 | } |
3737 | 3234 | ||
@@ -3887,7 +3384,7 @@ int nvgpu_vm_map_buffer(struct vm_gk20a *vm, | |||
3887 | } | 3384 | } |
3888 | 3385 | ||
3889 | ret_va = nvgpu_vm_map(vm, dmabuf, *offset_align, | 3386 | ret_va = nvgpu_vm_map(vm, dmabuf, *offset_align, |
3890 | flags, kind, NULL, true, | 3387 | flags, kind, true, |
3891 | gk20a_mem_flag_none, | 3388 | gk20a_mem_flag_none, |
3892 | buffer_offset, | 3389 | buffer_offset, |
3893 | mapping_size, | 3390 | mapping_size, |
@@ -4296,18 +3793,17 @@ hw_was_off: | |||
4296 | gk20a_idle_nosuspend(g->dev); | 3793 | gk20a_idle_nosuspend(g->dev); |
4297 | } | 3794 | } |
4298 | 3795 | ||
4299 | int nvgpu_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va, | 3796 | int nvgpu_vm_find_buf(struct vm_gk20a *vm, u64 gpu_va, |
4300 | struct dma_buf **dmabuf, | 3797 | struct dma_buf **dmabuf, |
4301 | u64 *offset) | 3798 | u64 *offset) |
4302 | { | 3799 | { |
4303 | struct mapped_buffer_node *mapped_buffer; | 3800 | struct nvgpu_mapped_buf *mapped_buffer; |
4304 | 3801 | ||
4305 | gk20a_dbg_fn("gpu_va=0x%llx", gpu_va); | 3802 | gk20a_dbg_fn("gpu_va=0x%llx", gpu_va); |
4306 | 3803 | ||
4307 | nvgpu_mutex_acquire(&vm->update_gmmu_lock); | 3804 | nvgpu_mutex_acquire(&vm->update_gmmu_lock); |
4308 | 3805 | ||
4309 | mapped_buffer = find_mapped_buffer_range_locked(vm->mapped_buffers, | 3806 | mapped_buffer = __nvgpu_vm_find_mapped_buf_range(vm, gpu_va); |
4310 | gpu_va); | ||
4311 | if (!mapped_buffer) { | 3807 | if (!mapped_buffer) { |
4312 | nvgpu_mutex_release(&vm->update_gmmu_lock); | 3808 | nvgpu_mutex_release(&vm->update_gmmu_lock); |
4313 | return -EINVAL; | 3809 | return -EINVAL; |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 331843cc..357962c7 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -140,52 +140,9 @@ struct priv_cmd_entry { | |||
140 | u32 size; /* in words */ | 140 | u32 size; /* in words */ |
141 | }; | 141 | }; |
142 | 142 | ||
143 | struct mapped_buffer_node { | ||
144 | struct vm_gk20a *vm; | ||
145 | struct nvgpu_rbtree_node node; | ||
146 | struct nvgpu_list_node va_buffers_list; | ||
147 | struct vm_reserved_va_node *va_node; | ||
148 | u64 addr; | ||
149 | u64 size; | ||
150 | struct dma_buf *dmabuf; | ||
151 | struct sg_table *sgt; | ||
152 | struct kref ref; | ||
153 | u32 user_mapped; | ||
154 | bool own_mem_ref; | ||
155 | u32 pgsz_idx; | ||
156 | u32 ctag_offset; | ||
157 | u32 ctag_lines; | ||
158 | u32 ctag_allocated_lines; | ||
159 | |||
160 | /* For comptag mapping, these are the mapping window parameters */ | ||
161 | bool ctags_mappable; | ||
162 | u64 ctag_map_win_addr; /* non-zero if mapped */ | ||
163 | u64 ctag_map_win_size; /* non-zero if ctags_mappable */ | ||
164 | u32 ctag_map_win_ctagline; /* ctagline at win start, set if | ||
165 | * ctags_mappable */ | ||
166 | |||
167 | u32 flags; | ||
168 | u32 kind; | ||
169 | bool va_allocated; | ||
170 | }; | ||
171 | |||
172 | static inline struct mapped_buffer_node * | ||
173 | mapped_buffer_node_from_va_buffers_list(struct nvgpu_list_node *node) | ||
174 | { | ||
175 | return (struct mapped_buffer_node *) | ||
176 | ((uintptr_t)node - offsetof(struct mapped_buffer_node, va_buffers_list)); | ||
177 | }; | ||
178 | |||
179 | static inline struct mapped_buffer_node * | ||
180 | mapped_buffer_from_rbtree_node(struct nvgpu_rbtree_node *node) | ||
181 | { | ||
182 | return (struct mapped_buffer_node *) | ||
183 | ((uintptr_t)node - offsetof(struct mapped_buffer_node, node)); | ||
184 | }; | ||
185 | |||
186 | struct vm_reserved_va_node { | 143 | struct vm_reserved_va_node { |
187 | struct nvgpu_list_node reserved_va_list; | 144 | struct nvgpu_list_node reserved_va_list; |
188 | struct nvgpu_list_node va_buffers_list; | 145 | struct nvgpu_list_node buffer_list_head; |
189 | u32 pgsz_idx; | 146 | u32 pgsz_idx; |
190 | u64 vaddr_start; | 147 | u64 vaddr_start; |
191 | u64 size; | 148 | u64 size; |
@@ -431,11 +388,6 @@ static inline phys_addr_t gk20a_mem_phys(struct nvgpu_mem *mem) | |||
431 | return 0; | 388 | return 0; |
432 | } | 389 | } |
433 | 390 | ||
434 | u32 __nvgpu_aperture_mask(struct gk20a *g, enum nvgpu_aperture aperture, | ||
435 | u32 sysmem_mask, u32 vidmem_mask); | ||
436 | u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem, | ||
437 | u32 sysmem_mask, u32 vidmem_mask); | ||
438 | |||
439 | void gk20a_pde_wr32(struct gk20a *g, struct gk20a_mm_entry *entry, | 391 | void gk20a_pde_wr32(struct gk20a *g, struct gk20a_mm_entry *entry, |
440 | size_t w, size_t data); | 392 | size_t w, size_t data); |
441 | u64 gk20a_pde_addr(struct gk20a *g, struct gk20a_mm_entry *entry); | 393 | u64 gk20a_pde_addr(struct gk20a *g, struct gk20a_mm_entry *entry); |
@@ -532,8 +484,6 @@ const struct gk20a_mmu_level *gk20a_mm_get_mmu_levels(struct gk20a *g, | |||
532 | void gk20a_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *mem, | 484 | void gk20a_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *mem, |
533 | struct vm_gk20a *vm); | 485 | struct vm_gk20a *vm); |
534 | 486 | ||
535 | void gk20a_remove_vm(struct vm_gk20a *vm, struct nvgpu_mem *inst_block); | ||
536 | |||
537 | int gk20a_big_pages_possible(struct vm_gk20a *vm, u64 base, u64 size); | 487 | int gk20a_big_pages_possible(struct vm_gk20a *vm, u64 base, u64 size); |
538 | 488 | ||
539 | extern const struct gk20a_mmu_level gk20a_mm_levels_64k[]; | 489 | extern const struct gk20a_mmu_level gk20a_mm_levels_64k[]; |
diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c index eab51175..75dfcc86 100644 --- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c | |||
@@ -401,7 +401,7 @@ static void gp10b_remove_bar2_vm(struct gk20a *g) | |||
401 | struct mm_gk20a *mm = &g->mm; | 401 | struct mm_gk20a *mm = &g->mm; |
402 | 402 | ||
403 | gp10b_replayable_pagefault_buffer_deinit(g); | 403 | gp10b_replayable_pagefault_buffer_deinit(g); |
404 | gk20a_remove_vm(&mm->bar2.vm, &mm->bar2.inst_block); | 404 | nvgpu_remove_vm(&mm->bar2.vm, &mm->bar2.inst_block); |
405 | } | 405 | } |
406 | 406 | ||
407 | 407 | ||
diff --git a/drivers/gpu/nvgpu/include/nvgpu/vm.h b/drivers/gpu/nvgpu/include/nvgpu/vm.h index 1fb772d5..e1ceffd4 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/vm.h +++ b/drivers/gpu/nvgpu/include/nvgpu/vm.h | |||
@@ -26,7 +26,10 @@ | |||
26 | #include <nvgpu/allocator.h> | 26 | #include <nvgpu/allocator.h> |
27 | 27 | ||
28 | struct vm_gk20a; | 28 | struct vm_gk20a; |
29 | struct mapped_buffer_node; | 29 | struct vm_reserved_va_node; |
30 | struct buffer_attrs; | ||
31 | struct gk20a_comptag_allocator; | ||
32 | |||
30 | 33 | ||
31 | /** | 34 | /** |
32 | * This header contains the OS agnostic APIs for dealing with VMs. Most of the | 35 | * This header contains the OS agnostic APIs for dealing with VMs. Most of the |
@@ -44,6 +47,50 @@ struct vm_gk20a_mapping_batch { | |||
44 | bool need_tlb_invalidate; | 47 | bool need_tlb_invalidate; |
45 | }; | 48 | }; |
46 | 49 | ||
50 | struct nvgpu_mapped_buf { | ||
51 | struct vm_gk20a *vm; | ||
52 | struct nvgpu_rbtree_node node; | ||
53 | struct nvgpu_list_node buffer_list; | ||
54 | struct vm_reserved_va_node *va_node; | ||
55 | u64 addr; | ||
56 | u64 size; | ||
57 | struct dma_buf *dmabuf; | ||
58 | struct sg_table *sgt; | ||
59 | struct kref ref; | ||
60 | u32 user_mapped; | ||
61 | bool own_mem_ref; | ||
62 | u32 pgsz_idx; | ||
63 | u32 ctag_offset; | ||
64 | u32 ctag_lines; | ||
65 | u32 ctag_allocated_lines; | ||
66 | |||
67 | /* For comptag mapping, these are the mapping window parameters */ | ||
68 | bool ctags_mappable; | ||
69 | u64 ctag_map_win_addr; /* non-zero if mapped */ | ||
70 | u64 ctag_map_win_size; /* non-zero if ctags_mappable */ | ||
71 | u32 ctag_map_win_ctagline; /* ctagline at win start, set if | ||
72 | * ctags_mappable */ | ||
73 | |||
74 | u32 flags; | ||
75 | u32 kind; | ||
76 | bool va_allocated; | ||
77 | }; | ||
78 | |||
79 | static inline struct nvgpu_mapped_buf * | ||
80 | nvgpu_mapped_buf_from_buffer_list(struct nvgpu_list_node *node) | ||
81 | { | ||
82 | return (struct nvgpu_mapped_buf *) | ||
83 | ((uintptr_t)node - offsetof(struct nvgpu_mapped_buf, | ||
84 | buffer_list)); | ||
85 | } | ||
86 | |||
87 | static inline struct nvgpu_mapped_buf * | ||
88 | mapped_buffer_from_rbtree_node(struct nvgpu_rbtree_node *node) | ||
89 | { | ||
90 | return (struct nvgpu_mapped_buf *) | ||
91 | ((uintptr_t)node - offsetof(struct nvgpu_mapped_buf, node)); | ||
92 | } | ||
93 | |||
47 | struct vm_gk20a { | 94 | struct vm_gk20a { |
48 | struct mm_gk20a *mm; | 95 | struct mm_gk20a *mm; |
49 | struct gk20a_as_share *as_share; /* as_share this represents */ | 96 | struct gk20a_as_share *as_share; /* as_share this represents */ |
@@ -102,6 +149,8 @@ struct vm_gk20a { | |||
102 | void nvgpu_vm_get(struct vm_gk20a *vm); | 149 | void nvgpu_vm_get(struct vm_gk20a *vm); |
103 | void nvgpu_vm_put(struct vm_gk20a *vm); | 150 | void nvgpu_vm_put(struct vm_gk20a *vm); |
104 | 151 | ||
152 | int vm_aspace_id(struct vm_gk20a *vm); | ||
153 | |||
105 | /* batching eliminates redundant cache flushes and invalidates */ | 154 | /* batching eliminates redundant cache flushes and invalidates */ |
106 | void nvgpu_vm_mapping_batch_start(struct vm_gk20a_mapping_batch *batch); | 155 | void nvgpu_vm_mapping_batch_start(struct vm_gk20a_mapping_batch *batch); |
107 | void nvgpu_vm_mapping_batch_finish( | 156 | void nvgpu_vm_mapping_batch_finish( |
@@ -112,24 +161,45 @@ void nvgpu_vm_mapping_batch_finish_locked( | |||
112 | 161 | ||
113 | /* get reference to all currently mapped buffers */ | 162 | /* get reference to all currently mapped buffers */ |
114 | int nvgpu_vm_get_buffers(struct vm_gk20a *vm, | 163 | int nvgpu_vm_get_buffers(struct vm_gk20a *vm, |
115 | struct mapped_buffer_node ***mapped_buffers, | 164 | struct nvgpu_mapped_buf ***mapped_buffers, |
116 | int *num_buffers); | 165 | int *num_buffers); |
117 | 166 | ||
118 | /* put references on the given buffers */ | 167 | /* put references on the given buffers */ |
119 | void nvgpu_vm_put_buffers(struct vm_gk20a *vm, | 168 | void nvgpu_vm_put_buffers(struct vm_gk20a *vm, |
120 | struct mapped_buffer_node **mapped_buffers, | 169 | struct nvgpu_mapped_buf **mapped_buffers, |
121 | int num_buffers); | 170 | int num_buffers); |
122 | 171 | ||
123 | /* Note: batch may be NULL if unmap op is not part of a batch */ | 172 | /* Note: batch may be NULL if unmap op is not part of a batch */ |
124 | int nvgpu_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset, | 173 | int nvgpu_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset, |
125 | struct vm_gk20a_mapping_batch *batch); | 174 | struct vm_gk20a_mapping_batch *batch); |
126 | 175 | ||
127 | void nvgpu_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer, | 176 | void nvgpu_vm_unmap_locked(struct nvgpu_mapped_buf *mapped_buffer, |
128 | struct vm_gk20a_mapping_batch *batch); | 177 | struct vm_gk20a_mapping_batch *batch); |
129 | 178 | ||
179 | /* | ||
180 | * These all require the VM update lock to be held. | ||
181 | */ | ||
182 | struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf( | ||
183 | struct vm_gk20a *vm, u64 addr); | ||
184 | struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf_range( | ||
185 | struct vm_gk20a *vm, u64 addr); | ||
186 | struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf_less_than( | ||
187 | struct vm_gk20a *vm, u64 addr); | ||
188 | |||
189 | int nvgpu_vm_find_buf(struct vm_gk20a *vm, u64 gpu_va, | ||
190 | struct dma_buf **dmabuf, | ||
191 | u64 *offset); | ||
192 | |||
193 | int nvgpu_insert_mapped_buf(struct vm_gk20a *vm, | ||
194 | struct nvgpu_mapped_buf *mapped_buffer); | ||
195 | void nvgpu_remove_mapped_buf(struct vm_gk20a *vm, | ||
196 | struct nvgpu_mapped_buf *mapped_buffer); | ||
197 | |||
130 | void nvgpu_vm_remove_support_nofree(struct vm_gk20a *vm); | 198 | void nvgpu_vm_remove_support_nofree(struct vm_gk20a *vm); |
131 | void nvgpu_vm_remove_support(struct vm_gk20a *vm); | 199 | void nvgpu_vm_remove_support(struct vm_gk20a *vm); |
132 | 200 | ||
201 | void nvgpu_remove_vm(struct vm_gk20a *vm, struct nvgpu_mem *inst_block); | ||
202 | |||
133 | int nvgpu_init_vm(struct mm_gk20a *mm, | 203 | int nvgpu_init_vm(struct mm_gk20a *mm, |
134 | struct vm_gk20a *vm, | 204 | struct vm_gk20a *vm, |
135 | u32 big_page_size, | 205 | u32 big_page_size, |
diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c index cfa9e428..f4004f42 100644 --- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c | |||
@@ -202,7 +202,7 @@ static void vgpu_locked_gmmu_unmap(struct vm_gk20a *vm, | |||
202 | static void vgpu_vm_remove_support(struct vm_gk20a *vm) | 202 | static void vgpu_vm_remove_support(struct vm_gk20a *vm) |
203 | { | 203 | { |
204 | struct gk20a *g = vm->mm->g; | 204 | struct gk20a *g = vm->mm->g; |
205 | struct mapped_buffer_node *mapped_buffer; | 205 | struct nvgpu_mapped_buf *mapped_buffer; |
206 | struct vm_reserved_va_node *va_node, *va_node_tmp; | 206 | struct vm_reserved_va_node *va_node, *va_node_tmp; |
207 | struct tegra_vgpu_cmd_msg msg; | 207 | struct tegra_vgpu_cmd_msg msg; |
208 | struct tegra_vgpu_as_share_params *p = &msg.params.as_share; | 208 | struct tegra_vgpu_as_share_params *p = &msg.params.as_share; |