summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/common/linux/vm.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/common/linux/vm.c')
-rw-r--r--drivers/gpu/nvgpu/common/linux/vm.c421
1 files changed, 421 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/vm.c b/drivers/gpu/nvgpu/common/linux/vm.c
new file mode 100644
index 00000000..8b9d6f96
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/vm.c
@@ -0,0 +1,421 @@
1/*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/dma-buf.h>
18#include <linux/scatterlist.h>
19
20#include <nvgpu/log.h>
21#include <nvgpu/lock.h>
22#include <nvgpu/rbtree.h>
23#include <nvgpu/page_allocator.h>
24
25#include "gk20a/gk20a.h"
26#include "gk20a/mm_gk20a.h"
27
28#include "vm_priv.h"
29
30static struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf_reverse(
31 struct vm_gk20a *vm, struct dma_buf *dmabuf, u32 kind)
32{
33 struct nvgpu_rbtree_node *node = NULL;
34 struct nvgpu_rbtree_node *root = vm->mapped_buffers;
35
36 nvgpu_rbtree_enum_start(0, &node, root);
37
38 while (node) {
39 struct nvgpu_mapped_buf *mapped_buffer =
40 mapped_buffer_from_rbtree_node(node);
41
42 if (mapped_buffer->dmabuf == dmabuf &&
43 kind == mapped_buffer->kind)
44 return mapped_buffer;
45
46 nvgpu_rbtree_enum_next(&node, node);
47 }
48
49 return NULL;
50}
51
52/*
53 * Determine alignment for a passed buffer. Necessary since the buffer may
54 * appear big to map with large pages but the SGL may have chunks that are not
55 * aligned on a 64/128kB large page boundary.
56 */
57static u64 nvgpu_get_buffer_alignment(struct gk20a *g, struct scatterlist *sgl,
58 enum nvgpu_aperture aperture)
59{
60 u64 align = 0, chunk_align = 0;
61 u64 buf_addr;
62
63 if (aperture == APERTURE_VIDMEM) {
64 struct nvgpu_page_alloc *alloc = get_vidmem_page_alloc(sgl);
65 struct page_alloc_chunk *chunk = NULL;
66
67 nvgpu_list_for_each_entry(chunk, &alloc->alloc_chunks,
68 page_alloc_chunk, list_entry) {
69 chunk_align = 1ULL << __ffs(chunk->base |
70 chunk->length);
71
72 if (align)
73 align = min(align, chunk_align);
74 else
75 align = chunk_align;
76 }
77
78 return align;
79 }
80
81 buf_addr = (u64)sg_dma_address(sgl);
82
83 if (g->mm.bypass_smmu || buf_addr == DMA_ERROR_CODE || !buf_addr) {
84 while (sgl) {
85 buf_addr = (u64)sg_phys(sgl);
86 chunk_align = 1ULL << __ffs(buf_addr |
87 (u64)sgl->length);
88
89 if (align)
90 align = min(align, chunk_align);
91 else
92 align = chunk_align;
93 sgl = sg_next(sgl);
94 }
95
96 return align;
97 }
98
99 align = 1ULL << __ffs(buf_addr);
100
101 return align;
102}
103
104/*
105 * vm->update_gmmu_lock must be held. This checks to see if we already have
106 * mapped the passed buffer into this VM. If so, just return the existing
107 * mapping address.
108 */
109static u64 __nvgpu_vm_find_mapping(struct vm_gk20a *vm,
110 struct dma_buf *dmabuf,
111 u64 offset_align,
112 u32 flags,
113 int kind,
114 bool user_mapped,
115 int rw_flag)
116{
117 struct gk20a *g = gk20a_from_vm(vm);
118 struct nvgpu_mapped_buf *mapped_buffer = NULL;
119
120 if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
121 mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, offset_align);
122 if (!mapped_buffer)
123 return 0;
124
125 if (mapped_buffer->dmabuf != dmabuf ||
126 mapped_buffer->kind != (u32)kind)
127 return 0;
128 } else {
129 mapped_buffer =
130 __nvgpu_vm_find_mapped_buf_reverse(vm, dmabuf, kind);
131 if (!mapped_buffer)
132 return 0;
133 }
134
135 if (mapped_buffer->flags != flags)
136 return 0;
137
138 /* mark the buffer as used */
139 if (user_mapped) {
140 if (mapped_buffer->user_mapped == 0)
141 vm->num_user_mapped_buffers++;
142 mapped_buffer->user_mapped++;
143
144 /* If the mapping comes from user space, we own
145 * the handle ref. Since we reuse an
146 * existing mapping here, we need to give back those
147 * refs once in order not to leak.
148 */
149 if (mapped_buffer->own_mem_ref)
150 dma_buf_put(mapped_buffer->dmabuf);
151 else
152 mapped_buffer->own_mem_ref = true;
153 }
154 kref_get(&mapped_buffer->ref);
155
156 nvgpu_log(g, gpu_dbg_map,
157 "gv: 0x%04x_%08x + 0x%-7zu "
158 "[dma: 0x%02x_%08x, pa: 0x%02x_%08x] "
159 "pgsz=%-3dKb as=%-2d ctags=%d start=%d "
160 "flags=0x%x apt=%s (reused)",
161 u64_hi32(mapped_buffer->addr), u64_lo32(mapped_buffer->addr),
162 dmabuf->size,
163 u64_hi32((u64)sg_dma_address(mapped_buffer->sgt->sgl)),
164 u64_lo32((u64)sg_dma_address(mapped_buffer->sgt->sgl)),
165 u64_hi32((u64)sg_phys(mapped_buffer->sgt->sgl)),
166 u64_lo32((u64)sg_phys(mapped_buffer->sgt->sgl)),
167 vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10,
168 vm_aspace_id(vm),
169 mapped_buffer->ctag_lines, mapped_buffer->ctag_offset,
170 mapped_buffer->flags,
171 nvgpu_aperture_str(gk20a_dmabuf_aperture(g, dmabuf)));
172
173 return mapped_buffer->addr;
174}
175
176u64 nvgpu_vm_map(struct vm_gk20a *vm,
177 struct dma_buf *dmabuf,
178 u64 offset_align,
179 u32 flags,
180 int kind,
181 bool user_mapped,
182 int rw_flag,
183 u64 buffer_offset,
184 u64 mapping_size,
185 struct vm_gk20a_mapping_batch *batch)
186{
187 struct gk20a *g = gk20a_from_vm(vm);
188 struct gk20a_comptag_allocator *ctag_allocator = &g->gr.comp_tags;
189 struct nvgpu_mapped_buf *mapped_buffer = NULL;
190 bool inserted = false, va_allocated = false;
191 u64 map_offset = 0;
192 int err = 0;
193 struct buffer_attrs bfr = {NULL};
194 struct gk20a_comptags comptags;
195 bool clear_ctags = false;
196 struct scatterlist *sgl;
197 u64 ctag_map_win_size = 0;
198 u32 ctag_map_win_ctagline = 0;
199 struct vm_reserved_va_node *va_node = NULL;
200 u32 ctag_offset;
201 enum nvgpu_aperture aperture;
202
203 if (user_mapped && vm->userspace_managed &&
204 !(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)) {
205 nvgpu_err(g, "non-fixed-offset mapping not available on "
206 "userspace managed address spaces");
207 return -EFAULT;
208 }
209
210 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
211
212 /* check if this buffer is already mapped */
213 if (!vm->userspace_managed) {
214 map_offset = __nvgpu_vm_find_mapping(
215 vm, dmabuf, offset_align,
216 flags, kind,
217 user_mapped, rw_flag);
218 if (map_offset) {
219 nvgpu_mutex_release(&vm->update_gmmu_lock);
220 return map_offset;
221 }
222 }
223
224 /* pin buffer to get phys/iovmm addr */
225 bfr.sgt = gk20a_mm_pin(g->dev, dmabuf);
226 if (IS_ERR(bfr.sgt)) {
227 /* Falling back to physical is actually possible
228 * here in many cases if we use 4K phys pages in the
229 * gmmu. However we have some regions which require
230 * contig regions to work properly (either phys-contig
231 * or contig through smmu io_vaspace). Until we can
232 * track the difference between those two cases we have
233 * to fail the mapping when we run out of SMMU space.
234 */
235 nvgpu_warn(g, "oom allocating tracking buffer");
236 goto clean_up;
237 }
238
239 bfr.kind_v = kind;
240 bfr.size = dmabuf->size;
241 sgl = bfr.sgt->sgl;
242
243 aperture = gk20a_dmabuf_aperture(g, dmabuf);
244 if (aperture == APERTURE_INVALID) {
245 err = -EINVAL;
246 goto clean_up;
247 }
248
249 if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)
250 map_offset = offset_align;
251
252 bfr.align = nvgpu_get_buffer_alignment(g, sgl, aperture);
253 bfr.pgsz_idx = __get_pte_size(vm, map_offset,
254 min_t(u64, bfr.size, bfr.align));
255 mapping_size = mapping_size ? mapping_size : bfr.size;
256
257 /* Check if we should use a fixed offset for mapping this buffer */
258 if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
259 err = validate_fixed_buffer(vm, &bfr,
260 offset_align, mapping_size,
261 &va_node);
262 if (err)
263 goto clean_up;
264
265 map_offset = offset_align;
266 va_allocated = false;
267 } else
268 va_allocated = true;
269
270 err = setup_buffer_kind_and_compression(vm, flags, &bfr, bfr.pgsz_idx);
271 if (unlikely(err)) {
272 nvgpu_err(g, "failure setting up kind and compression");
273 goto clean_up;
274 }
275
276 /* bar1 and pmu vm don't need ctag */
277 if (!vm->enable_ctag)
278 bfr.ctag_lines = 0;
279
280 gk20a_get_comptags(g->dev, dmabuf, &comptags);
281
282 /* ensure alignment to compression page size if compression enabled */
283 if (bfr.ctag_offset)
284 mapping_size = ALIGN(mapping_size,
285 g->ops.fb.compression_page_size(g));
286
287 if (bfr.ctag_lines && !comptags.lines) {
288 const bool user_mappable =
289 !!(flags & NVGPU_AS_MAP_BUFFER_FLAGS_MAPPABLE_COMPBITS);
290
291 /* allocate compression resources if needed */
292 err = gk20a_alloc_comptags(g, g->dev, dmabuf, ctag_allocator,
293 bfr.ctag_lines, user_mappable,
294 &ctag_map_win_size,
295 &ctag_map_win_ctagline);
296 if (err) {
297 /* ok to fall back here if we ran out */
298 /* TBD: we can partially alloc ctags as well... */
299 bfr.kind_v = bfr.uc_kind_v;
300 } else {
301 gk20a_get_comptags(g->dev, dmabuf, &comptags);
302
303 if (g->ops.ltc.cbc_ctrl)
304 g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_clear,
305 comptags.offset,
306 comptags.offset +
307 comptags.allocated_lines - 1);
308 else
309 clear_ctags = true;
310 }
311 }
312
313 /* store the comptag info */
314 bfr.ctag_offset = comptags.offset;
315 bfr.ctag_lines = comptags.lines;
316 bfr.ctag_allocated_lines = comptags.allocated_lines;
317 bfr.ctag_user_mappable = comptags.user_mappable;
318
319 /*
320 * Calculate comptag index for this mapping. Differs in
321 * case of partial mapping.
322 */
323 ctag_offset = comptags.offset;
324 if (ctag_offset)
325 ctag_offset += buffer_offset >>
326 ilog2(g->ops.fb.compression_page_size(g));
327
328 /* update gmmu ptes */
329 map_offset = g->ops.mm.gmmu_map(vm, map_offset,
330 bfr.sgt,
331 buffer_offset, /* sg offset */
332 mapping_size,
333 bfr.pgsz_idx,
334 bfr.kind_v,
335 ctag_offset,
336 flags, rw_flag,
337 clear_ctags,
338 false,
339 false,
340 batch,
341 aperture);
342 if (!map_offset)
343 goto clean_up;
344
345 mapped_buffer = nvgpu_kzalloc(g, sizeof(*mapped_buffer));
346 if (!mapped_buffer) {
347 nvgpu_warn(g, "oom allocating tracking buffer");
348 goto clean_up;
349 }
350 mapped_buffer->dmabuf = dmabuf;
351 mapped_buffer->sgt = bfr.sgt;
352 mapped_buffer->addr = map_offset;
353 mapped_buffer->size = mapping_size;
354 mapped_buffer->pgsz_idx = bfr.pgsz_idx;
355 mapped_buffer->ctag_offset = bfr.ctag_offset;
356 mapped_buffer->ctag_lines = bfr.ctag_lines;
357 mapped_buffer->ctag_allocated_lines = bfr.ctag_allocated_lines;
358 mapped_buffer->ctags_mappable = bfr.ctag_user_mappable;
359 mapped_buffer->ctag_map_win_size = ctag_map_win_size;
360 mapped_buffer->ctag_map_win_ctagline = ctag_map_win_ctagline;
361 mapped_buffer->vm = vm;
362 mapped_buffer->flags = flags;
363 mapped_buffer->kind = kind;
364 mapped_buffer->va_allocated = va_allocated;
365 mapped_buffer->user_mapped = user_mapped ? 1 : 0;
366 mapped_buffer->own_mem_ref = user_mapped;
367 nvgpu_init_list_node(&mapped_buffer->buffer_list);
368 kref_init(&mapped_buffer->ref);
369
370 err = nvgpu_insert_mapped_buf(vm, mapped_buffer);
371 if (err) {
372 nvgpu_err(g, "failed to insert into mapped buffer tree");
373 goto clean_up;
374 }
375 inserted = true;
376 if (user_mapped)
377 vm->num_user_mapped_buffers++;
378
379 if (va_node) {
380 nvgpu_list_add_tail(&mapped_buffer->buffer_list,
381 &va_node->buffer_list_head);
382 mapped_buffer->va_node = va_node;
383 }
384
385 nvgpu_mutex_release(&vm->update_gmmu_lock);
386
387 return map_offset;
388
389clean_up:
390 if (inserted) {
391 nvgpu_remove_mapped_buf(vm, mapped_buffer);
392 if (user_mapped)
393 vm->num_user_mapped_buffers--;
394 }
395 nvgpu_kfree(g, mapped_buffer);
396 if (va_allocated)
397 gk20a_vm_free_va(vm, map_offset, bfr.size, bfr.pgsz_idx);
398 if (!IS_ERR(bfr.sgt))
399 gk20a_mm_unpin(g->dev, dmabuf, bfr.sgt);
400
401 nvgpu_mutex_release(&vm->update_gmmu_lock);
402 nvgpu_log_info(g, "err=%d\n", err);
403 return 0;
404}
405
406void nvgpu_vm_unmap(struct vm_gk20a *vm, u64 offset)
407{
408 struct gk20a *g = vm->mm->g;
409 struct nvgpu_mapped_buf *mapped_buffer;
410
411 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
412 mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, offset);
413 if (!mapped_buffer) {
414 nvgpu_mutex_release(&vm->update_gmmu_lock);
415 nvgpu_err(g, "invalid addr to unmap 0x%llx", offset);
416 return;
417 }
418
419 kref_put(&mapped_buffer->ref, gk20a_vm_unmap_locked_kref);
420 nvgpu_mutex_release(&vm->update_gmmu_lock);
421}