diff options
Diffstat (limited to 'include/os/linux/vm.c')
-rw-r--r-- | include/os/linux/vm.c | 356 |
1 files changed, 356 insertions, 0 deletions
diff --git a/include/os/linux/vm.c b/include/os/linux/vm.c new file mode 100644 index 0000000..dc807ab --- /dev/null +++ b/include/os/linux/vm.c | |||
@@ -0,0 +1,356 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/dma-buf.h> | ||
18 | #include <linux/scatterlist.h> | ||
19 | #include <uapi/linux/nvgpu.h> | ||
20 | |||
21 | #include <nvgpu/log.h> | ||
22 | #include <nvgpu/lock.h> | ||
23 | #include <nvgpu/rbtree.h> | ||
24 | #include <nvgpu/vm_area.h> | ||
25 | #include <nvgpu/nvgpu_mem.h> | ||
26 | #include <nvgpu/page_allocator.h> | ||
27 | #include <nvgpu/vidmem.h> | ||
28 | #include <nvgpu/utils.h> | ||
29 | #include <nvgpu/gk20a.h> | ||
30 | |||
31 | #include <nvgpu/linux/vm.h> | ||
32 | #include <nvgpu/linux/nvgpu_mem.h> | ||
33 | |||
34 | #include "gk20a/mm_gk20a.h" | ||
35 | |||
36 | #include "platform_gk20a.h" | ||
37 | #include "os_linux.h" | ||
38 | #include "dmabuf.h" | ||
39 | #include "dmabuf_vidmem.h" | ||
40 | |||
41 | static u32 nvgpu_vm_translate_linux_flags(struct gk20a *g, u32 flags) | ||
42 | { | ||
43 | u32 core_flags = 0; | ||
44 | |||
45 | if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) | ||
46 | core_flags |= NVGPU_VM_MAP_FIXED_OFFSET; | ||
47 | if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_CACHEABLE) | ||
48 | core_flags |= NVGPU_VM_MAP_CACHEABLE; | ||
49 | if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_IO_COHERENT) | ||
50 | core_flags |= NVGPU_VM_MAP_IO_COHERENT; | ||
51 | if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_UNMAPPED_PTE) | ||
52 | core_flags |= NVGPU_VM_MAP_UNMAPPED_PTE; | ||
53 | if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_L3_ALLOC) | ||
54 | core_flags |= NVGPU_VM_MAP_L3_ALLOC; | ||
55 | if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL) | ||
56 | core_flags |= NVGPU_VM_MAP_DIRECT_KIND_CTRL; | ||
57 | if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_PLATFORM_ATOMIC) | ||
58 | core_flags |= NVGPU_VM_MAP_PLATFORM_ATOMIC; | ||
59 | |||
60 | if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_MAPPABLE_COMPBITS) | ||
61 | nvgpu_warn(g, "Ignoring deprecated flag: " | ||
62 | "NVGPU_AS_MAP_BUFFER_FLAGS_MAPPABLE_COMPBITS"); | ||
63 | |||
64 | return core_flags; | ||
65 | } | ||
66 | |||
67 | static struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf_reverse( | ||
68 | struct vm_gk20a *vm, struct dma_buf *dmabuf, u32 kind) | ||
69 | { | ||
70 | struct nvgpu_rbtree_node *node = NULL; | ||
71 | struct nvgpu_rbtree_node *root = vm->mapped_buffers; | ||
72 | |||
73 | nvgpu_rbtree_enum_start(0, &node, root); | ||
74 | |||
75 | while (node) { | ||
76 | struct nvgpu_mapped_buf *mapped_buffer = | ||
77 | mapped_buffer_from_rbtree_node(node); | ||
78 | |||
79 | if (mapped_buffer->os_priv.dmabuf == dmabuf && | ||
80 | mapped_buffer->kind == kind) | ||
81 | return mapped_buffer; | ||
82 | |||
83 | nvgpu_rbtree_enum_next(&node, node); | ||
84 | } | ||
85 | |||
86 | return NULL; | ||
87 | } | ||
88 | |||
89 | int nvgpu_vm_find_buf(struct vm_gk20a *vm, u64 gpu_va, | ||
90 | struct dma_buf **dmabuf, | ||
91 | u64 *offset) | ||
92 | { | ||
93 | struct nvgpu_mapped_buf *mapped_buffer; | ||
94 | struct gk20a *g = gk20a_from_vm(vm); | ||
95 | |||
96 | nvgpu_log_fn(g, "gpu_va=0x%llx", gpu_va); | ||
97 | |||
98 | nvgpu_mutex_acquire(&vm->update_gmmu_lock); | ||
99 | |||
100 | mapped_buffer = __nvgpu_vm_find_mapped_buf_range(vm, gpu_va); | ||
101 | if (!mapped_buffer) { | ||
102 | nvgpu_mutex_release(&vm->update_gmmu_lock); | ||
103 | return -EINVAL; | ||
104 | } | ||
105 | |||
106 | *dmabuf = mapped_buffer->os_priv.dmabuf; | ||
107 | *offset = gpu_va - mapped_buffer->addr; | ||
108 | |||
109 | nvgpu_mutex_release(&vm->update_gmmu_lock); | ||
110 | |||
111 | return 0; | ||
112 | } | ||
113 | |||
114 | u64 nvgpu_os_buf_get_size(struct nvgpu_os_buffer *os_buf) | ||
115 | { | ||
116 | return os_buf->dmabuf->size; | ||
117 | } | ||
118 | |||
119 | /* | ||
120 | * vm->update_gmmu_lock must be held. This checks to see if we already have | ||
121 | * mapped the passed buffer into this VM. If so, just return the existing | ||
122 | * mapping address. | ||
123 | */ | ||
124 | struct nvgpu_mapped_buf *nvgpu_vm_find_mapping(struct vm_gk20a *vm, | ||
125 | struct nvgpu_os_buffer *os_buf, | ||
126 | u64 map_addr, | ||
127 | u32 flags, | ||
128 | int kind) | ||
129 | { | ||
130 | struct gk20a *g = gk20a_from_vm(vm); | ||
131 | struct nvgpu_mapped_buf *mapped_buffer = NULL; | ||
132 | |||
133 | if (flags & NVGPU_VM_MAP_FIXED_OFFSET) { | ||
134 | mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, map_addr); | ||
135 | if (!mapped_buffer) | ||
136 | return NULL; | ||
137 | |||
138 | if (mapped_buffer->os_priv.dmabuf != os_buf->dmabuf || | ||
139 | mapped_buffer->kind != (u32)kind) | ||
140 | return NULL; | ||
141 | } else { | ||
142 | mapped_buffer = | ||
143 | __nvgpu_vm_find_mapped_buf_reverse(vm, | ||
144 | os_buf->dmabuf, | ||
145 | kind); | ||
146 | if (!mapped_buffer) | ||
147 | return NULL; | ||
148 | } | ||
149 | |||
150 | if (mapped_buffer->flags != flags) | ||
151 | return NULL; | ||
152 | |||
153 | /* | ||
154 | * If we find the mapping here then that means we have mapped it already | ||
155 | * and the prior pin and get must be undone. | ||
156 | */ | ||
157 | gk20a_mm_unpin(os_buf->dev, os_buf->dmabuf, os_buf->attachment, | ||
158 | mapped_buffer->os_priv.sgt); | ||
159 | dma_buf_put(os_buf->dmabuf); | ||
160 | |||
161 | nvgpu_log(g, gpu_dbg_map, | ||
162 | "gv: 0x%04x_%08x + 0x%-7zu " | ||
163 | "[dma: 0x%010llx, pa: 0x%010llx] " | ||
164 | "pgsz=%-3dKb as=%-2d " | ||
165 | "flags=0x%x apt=%s (reused)", | ||
166 | u64_hi32(mapped_buffer->addr), u64_lo32(mapped_buffer->addr), | ||
167 | os_buf->dmabuf->size, | ||
168 | (u64)sg_dma_address(mapped_buffer->os_priv.sgt->sgl), | ||
169 | (u64)sg_phys(mapped_buffer->os_priv.sgt->sgl), | ||
170 | vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10, | ||
171 | vm_aspace_id(vm), | ||
172 | mapped_buffer->flags, | ||
173 | nvgpu_aperture_str(g, | ||
174 | gk20a_dmabuf_aperture(g, os_buf->dmabuf))); | ||
175 | |||
176 | return mapped_buffer; | ||
177 | } | ||
178 | |||
179 | int nvgpu_vm_map_linux(struct vm_gk20a *vm, | ||
180 | struct dma_buf *dmabuf, | ||
181 | u64 map_addr, | ||
182 | u32 flags, | ||
183 | u32 page_size, | ||
184 | s16 compr_kind, | ||
185 | s16 incompr_kind, | ||
186 | int rw_flag, | ||
187 | u64 buffer_offset, | ||
188 | u64 mapping_size, | ||
189 | struct vm_gk20a_mapping_batch *batch, | ||
190 | u64 *gpu_va) | ||
191 | { | ||
192 | struct gk20a *g = gk20a_from_vm(vm); | ||
193 | struct device *dev = dev_from_gk20a(g); | ||
194 | struct nvgpu_os_buffer os_buf; | ||
195 | struct sg_table *sgt; | ||
196 | struct nvgpu_sgt *nvgpu_sgt = NULL; | ||
197 | struct nvgpu_mapped_buf *mapped_buffer = NULL; | ||
198 | struct dma_buf_attachment *attachment; | ||
199 | int err = 0; | ||
200 | |||
201 | sgt = gk20a_mm_pin(dev, dmabuf, &attachment); | ||
202 | if (IS_ERR(sgt)) { | ||
203 | nvgpu_warn(g, "Failed to pin dma_buf!"); | ||
204 | return PTR_ERR(sgt); | ||
205 | } | ||
206 | os_buf.dmabuf = dmabuf; | ||
207 | os_buf.attachment = attachment; | ||
208 | os_buf.dev = dev; | ||
209 | |||
210 | if (gk20a_dmabuf_aperture(g, dmabuf) == APERTURE_INVALID) { | ||
211 | err = -EINVAL; | ||
212 | goto clean_up; | ||
213 | } | ||
214 | |||
215 | nvgpu_sgt = nvgpu_linux_sgt_create(g, sgt); | ||
216 | if (!nvgpu_sgt) { | ||
217 | err = -ENOMEM; | ||
218 | goto clean_up; | ||
219 | } | ||
220 | |||
221 | mapped_buffer = nvgpu_vm_map(vm, | ||
222 | &os_buf, | ||
223 | nvgpu_sgt, | ||
224 | map_addr, | ||
225 | mapping_size, | ||
226 | buffer_offset, | ||
227 | rw_flag, | ||
228 | flags, | ||
229 | compr_kind, | ||
230 | incompr_kind, | ||
231 | batch, | ||
232 | gk20a_dmabuf_aperture(g, dmabuf)); | ||
233 | |||
234 | nvgpu_sgt_free(g, nvgpu_sgt); | ||
235 | |||
236 | if (IS_ERR(mapped_buffer)) { | ||
237 | err = PTR_ERR(mapped_buffer); | ||
238 | goto clean_up; | ||
239 | } | ||
240 | |||
241 | mapped_buffer->os_priv.dmabuf = dmabuf; | ||
242 | mapped_buffer->os_priv.attachment = attachment; | ||
243 | mapped_buffer->os_priv.sgt = sgt; | ||
244 | |||
245 | *gpu_va = mapped_buffer->addr; | ||
246 | return 0; | ||
247 | |||
248 | clean_up: | ||
249 | gk20a_mm_unpin(dev, dmabuf, attachment, sgt); | ||
250 | |||
251 | return err; | ||
252 | } | ||
253 | |||
254 | int nvgpu_vm_map_buffer(struct vm_gk20a *vm, | ||
255 | int dmabuf_fd, | ||
256 | u64 *map_addr, | ||
257 | u32 flags, /*NVGPU_AS_MAP_BUFFER_FLAGS_*/ | ||
258 | u32 page_size, | ||
259 | s16 compr_kind, | ||
260 | s16 incompr_kind, | ||
261 | u64 buffer_offset, | ||
262 | u64 mapping_size, | ||
263 | struct vm_gk20a_mapping_batch *batch) | ||
264 | { | ||
265 | struct gk20a *g = gk20a_from_vm(vm); | ||
266 | struct dma_buf *dmabuf; | ||
267 | u64 ret_va; | ||
268 | int err = 0; | ||
269 | |||
270 | /* get ref to the mem handle (released on unmap_locked) */ | ||
271 | dmabuf = dma_buf_get(dmabuf_fd); | ||
272 | if (IS_ERR(dmabuf)) { | ||
273 | nvgpu_warn(g, "%s: fd %d is not a dmabuf", | ||
274 | __func__, dmabuf_fd); | ||
275 | return PTR_ERR(dmabuf); | ||
276 | } | ||
277 | |||
278 | /* | ||
279 | * For regular maps we do not accept either an input address or a | ||
280 | * buffer_offset. | ||
281 | */ | ||
282 | if (!(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) && | ||
283 | (buffer_offset || *map_addr)) { | ||
284 | nvgpu_err(g, | ||
285 | "Regular map with addr/buf offset is not supported!"); | ||
286 | dma_buf_put(dmabuf); | ||
287 | return -EINVAL; | ||
288 | } | ||
289 | |||
290 | /* | ||
291 | * Map size is always buffer size for non fixed mappings. As such map | ||
292 | * size should be left as zero by userspace for non-fixed maps. | ||
293 | */ | ||
294 | if (mapping_size && !(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)) { | ||
295 | nvgpu_err(g, "map_size && non-fixed-mapping!"); | ||
296 | dma_buf_put(dmabuf); | ||
297 | return -EINVAL; | ||
298 | } | ||
299 | |||
300 | /* verify that we're not overflowing the buffer, i.e. | ||
301 | * (buffer_offset + mapping_size) > dmabuf->size. | ||
302 | * | ||
303 | * Since buffer_offset + mapping_size could overflow, first check | ||
304 | * that mapping size < dmabuf_size, at which point we can subtract | ||
305 | * mapping_size from both sides for the final comparison. | ||
306 | */ | ||
307 | if ((mapping_size > dmabuf->size) || | ||
308 | (buffer_offset > (dmabuf->size - mapping_size))) { | ||
309 | nvgpu_err(g, | ||
310 | "buf size %llx < (offset(%llx) + map_size(%llx))", | ||
311 | (u64)dmabuf->size, buffer_offset, mapping_size); | ||
312 | dma_buf_put(dmabuf); | ||
313 | return -EINVAL; | ||
314 | } | ||
315 | |||
316 | err = gk20a_dmabuf_alloc_drvdata(dmabuf, dev_from_vm(vm)); | ||
317 | if (err) { | ||
318 | dma_buf_put(dmabuf); | ||
319 | return err; | ||
320 | } | ||
321 | |||
322 | err = nvgpu_vm_map_linux(vm, dmabuf, *map_addr, | ||
323 | nvgpu_vm_translate_linux_flags(g, flags), | ||
324 | page_size, | ||
325 | compr_kind, incompr_kind, | ||
326 | gk20a_mem_flag_none, | ||
327 | buffer_offset, | ||
328 | mapping_size, | ||
329 | batch, | ||
330 | &ret_va); | ||
331 | |||
332 | if (!err) | ||
333 | *map_addr = ret_va; | ||
334 | else | ||
335 | dma_buf_put(dmabuf); | ||
336 | |||
337 | return err; | ||
338 | } | ||
339 | |||
340 | /* | ||
341 | * This is the function call-back for freeing OS specific components of an | ||
342 | * nvgpu_mapped_buf. This should most likely never be called outside of the | ||
343 | * core MM framework! | ||
344 | * | ||
345 | * Note: the VM lock will be held. | ||
346 | */ | ||
347 | void nvgpu_vm_unmap_system(struct nvgpu_mapped_buf *mapped_buffer) | ||
348 | { | ||
349 | struct vm_gk20a *vm = mapped_buffer->vm; | ||
350 | |||
351 | gk20a_mm_unpin(dev_from_vm(vm), mapped_buffer->os_priv.dmabuf, | ||
352 | mapped_buffer->os_priv.attachment, | ||
353 | mapped_buffer->os_priv.sgt); | ||
354 | |||
355 | dma_buf_put(mapped_buffer->os_priv.dmabuf); | ||
356 | } | ||