summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/common/linux/dma.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/common/linux/dma.c')
-rw-r--r--drivers/gpu/nvgpu/common/linux/dma.c640
1 files changed, 640 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/dma.c b/drivers/gpu/nvgpu/common/linux/dma.c
new file mode 100644
index 00000000..22f2cefb
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/dma.c
@@ -0,0 +1,640 @@
1/*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/dma-attrs.h>
18#include <linux/dma-mapping.h>
19#include <linux/version.h>
20
21#include <nvgpu/log.h>
22#include <nvgpu/dma.h>
23#include <nvgpu/lock.h>
24#include <nvgpu/bug.h>
25#include <nvgpu/gmmu.h>
26#include <nvgpu/kmem.h>
27#include <nvgpu/enabled.h>
28#include <nvgpu/vidmem.h>
29
30#include <nvgpu/linux/dma.h>
31#include <nvgpu/linux/vidmem.h>
32
33#include "gk20a/gk20a.h"
34
35#include "platform_gk20a.h"
36#include "os_linux.h"
37
38/*
39 * Enough to hold all the possible flags in string form. When a new flag is
40 * added it must be added here as well!!
41 */
42#define NVGPU_DMA_STR_SIZE \
43 sizeof("NO_KERNEL_MAPPING FORCE_CONTIGUOUS READ_ONLY")
44
45/*
46 * The returned string is kmalloc()ed here but must be freed by the caller.
47 */
48static char *nvgpu_dma_flags_to_str(struct gk20a *g, unsigned long flags)
49{
50 char *buf = nvgpu_kzalloc(g, NVGPU_DMA_STR_SIZE);
51 int bytes_available = NVGPU_DMA_STR_SIZE;
52
53 /*
54 * Return the empty buffer if there's no flags. Makes it easier on the
55 * calling code to just print it instead of any if (NULL) type logic.
56 */
57 if (!flags)
58 return buf;
59
60#define APPEND_FLAG(flag, str_flag) \
61 do { \
62 if (flags & flag) { \
63 strncat(buf, str_flag, bytes_available); \
64 bytes_available -= strlen(str_flag); \
65 } \
66 } while (0)
67
68 APPEND_FLAG(NVGPU_DMA_NO_KERNEL_MAPPING, "NO_KERNEL_MAPPING ");
69 APPEND_FLAG(NVGPU_DMA_FORCE_CONTIGUOUS, "FORCE_CONTIGUOUS ");
70 APPEND_FLAG(NVGPU_DMA_READ_ONLY, "READ_ONLY");
71#undef APPEND_FLAG
72
73 return buf;
74}
75
76/**
77 * __dma_dbg - Debug print for DMA allocs and frees.
78 *
79 * @g - The GPU.
80 * @size - The requested size of the alloc (size_t).
81 * @flags - The flags (unsigned long).
82 * @type - A string describing the type (i.e: sysmem or vidmem).
83 * @what - A string with 'alloc' or 'free'.
84 *
85 * @flags is the DMA flags. If there are none or it doesn't make sense to print
86 * flags just pass 0.
87 *
88 * Please use dma_dbg_alloc() and dma_dbg_free() instead of this function.
89 */
90static void __dma_dbg(struct gk20a *g, size_t size, unsigned long flags,
91 const char *type, const char *what)
92{
93 char *flags_str = NULL;
94
95 /*
96 * Don't bother making the flags_str if debugging is
97 * not enabled. This saves a malloc and a free.
98 */
99 if (!nvgpu_log_mask_enabled(g, gpu_dbg_dma))
100 return;
101
102 flags_str = nvgpu_dma_flags_to_str(g, flags);
103
104 __nvgpu_log_dbg(g, gpu_dbg_dma,
105 __func__, __LINE__,
106 "DMA %s: [%s] size=%-7zu "
107 "aligned=%-7zu total=%-10llukB %s",
108 what, type,
109 size, PAGE_ALIGN(size),
110 g->dma_memory_used >> 10,
111 flags_str);
112
113 if (flags_str)
114 nvgpu_kfree(g, flags_str);
115}
116
117#define dma_dbg_alloc(g, size, flags, type) \
118 __dma_dbg(g, size, flags, type, "alloc")
119#define dma_dbg_free(g, size, flags, type) \
120 __dma_dbg(g, size, flags, type, "free")
121
122/*
123 * For after the DMA alloc is done.
124 */
125#define __dma_dbg_done(g, size, type, what) \
126 nvgpu_log(g, gpu_dbg_dma, \
127 "DMA %s: [%s] size=%-7zu Done!", \
128 what, type, size); \
129
130#define dma_dbg_alloc_done(g, size, type) \
131 __dma_dbg_done(g, size, type, "alloc")
132#define dma_dbg_free_done(g, size, type) \
133 __dma_dbg_done(g, size, type, "free")
134
135#if defined(CONFIG_GK20A_VIDMEM)
136static u64 __nvgpu_dma_alloc(struct nvgpu_allocator *allocator, u64 at,
137 size_t size)
138{
139 u64 addr = 0;
140
141 if (at)
142 addr = nvgpu_alloc_fixed(allocator, at, size, 0);
143 else
144 addr = nvgpu_alloc(allocator, size);
145
146 return addr;
147}
148#endif
149
150#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0)
151static void nvgpu_dma_flags_to_attrs(unsigned long *attrs,
152 unsigned long flags)
153#define ATTR_ARG(x) *x
154#else
155static void nvgpu_dma_flags_to_attrs(struct dma_attrs *attrs,
156 unsigned long flags)
157#define ATTR_ARG(x) x
158#endif
159{
160 if (flags & NVGPU_DMA_NO_KERNEL_MAPPING)
161 dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, ATTR_ARG(attrs));
162 if (flags & NVGPU_DMA_FORCE_CONTIGUOUS)
163 dma_set_attr(DMA_ATTR_FORCE_CONTIGUOUS, ATTR_ARG(attrs));
164 if (flags & NVGPU_DMA_READ_ONLY)
165 dma_set_attr(DMA_ATTR_READ_ONLY, ATTR_ARG(attrs));
166#undef ATTR_ARG
167}
168
169int nvgpu_dma_alloc(struct gk20a *g, size_t size, struct nvgpu_mem *mem)
170{
171 return nvgpu_dma_alloc_flags(g, 0, size, mem);
172}
173
174int nvgpu_dma_alloc_flags(struct gk20a *g, unsigned long flags, size_t size,
175 struct nvgpu_mem *mem)
176{
177 if (!nvgpu_is_enabled(g, NVGPU_MM_UNIFIED_MEMORY)) {
178 /*
179 * Force the no-kernel-mapping flag on because we don't support
180 * the lack of it for vidmem - the user should not care when
181 * using nvgpu_gmmu_alloc_map and it's vidmem, or if there's a
182 * difference, the user should use the flag explicitly anyway.
183 *
184 * Incoming flags are ignored here, since bits other than the
185 * no-kernel-mapping flag are ignored by the vidmem mapping
186 * functions anyway.
187 */
188 int err = nvgpu_dma_alloc_flags_vid(g,
189 NVGPU_DMA_NO_KERNEL_MAPPING,
190 size, mem);
191
192 if (!err)
193 return 0;
194 /*
195 * Fall back to sysmem (which may then also fail) in case
196 * vidmem is exhausted.
197 */
198 }
199
200 return nvgpu_dma_alloc_flags_sys(g, flags, size, mem);
201}
202
203int nvgpu_dma_alloc_sys(struct gk20a *g, size_t size, struct nvgpu_mem *mem)
204{
205 return nvgpu_dma_alloc_flags_sys(g, 0, size, mem);
206}
207
208int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags,
209 size_t size, struct nvgpu_mem *mem)
210{
211 struct device *d = dev_from_gk20a(g);
212 int err;
213 dma_addr_t iova;
214 DEFINE_DMA_ATTRS(dma_attrs);
215 void *alloc_ret;
216
217 /*
218 * Before the debug print so we see this in the total. But during
219 * cleanup in the fail path this has to be subtracted.
220 */
221 g->dma_memory_used += mem->aligned_size;
222
223 dma_dbg_alloc(g, size, flags, "sysmem");
224
225 /*
226 * Save the old size but for actual allocation purposes the size is
227 * going to be page aligned.
228 */
229 mem->size = size;
230 size = PAGE_ALIGN(size);
231
232 nvgpu_dma_flags_to_attrs(&dma_attrs, flags);
233
234 alloc_ret = dma_alloc_attrs(d, size, &iova, GFP_KERNEL,
235 __DMA_ATTR(dma_attrs));
236 if (!alloc_ret)
237 return -ENOMEM;
238
239 if (flags & NVGPU_DMA_NO_KERNEL_MAPPING) {
240 mem->priv.pages = alloc_ret;
241 err = nvgpu_get_sgtable_from_pages(g, &mem->priv.sgt,
242 mem->priv.pages,
243 iova, size);
244 } else {
245 mem->cpu_va = alloc_ret;
246 err = nvgpu_get_sgtable_attrs(g, &mem->priv.sgt, mem->cpu_va,
247 iova, size, flags);
248 memset(mem->cpu_va, 0, size);
249 }
250 if (err)
251 goto fail_free;
252
253 mem->aligned_size = size;
254 mem->aperture = APERTURE_SYSMEM;
255 mem->priv.flags = flags;
256
257 dma_dbg_alloc_done(g, mem->size, "sysmem");
258
259 return 0;
260
261fail_free:
262 g->dma_memory_used -= mem->aligned_size;
263 dma_free_attrs(d, size, alloc_ret, iova, __DMA_ATTR(dma_attrs));
264 mem->cpu_va = NULL;
265 mem->priv.sgt = NULL;
266 mem->size = 0;
267 return err;
268}
269
270int nvgpu_dma_alloc_vid(struct gk20a *g, size_t size, struct nvgpu_mem *mem)
271{
272 return nvgpu_dma_alloc_flags_vid(g,
273 NVGPU_DMA_NO_KERNEL_MAPPING, size, mem);
274}
275
276int nvgpu_dma_alloc_flags_vid(struct gk20a *g, unsigned long flags,
277 size_t size, struct nvgpu_mem *mem)
278{
279 return nvgpu_dma_alloc_flags_vid_at(g, flags, size, mem, 0);
280}
281
282int nvgpu_dma_alloc_flags_vid_at(struct gk20a *g, unsigned long flags,
283 size_t size, struct nvgpu_mem *mem, u64 at)
284{
285#if defined(CONFIG_GK20A_VIDMEM)
286 u64 addr;
287 int err;
288 struct nvgpu_allocator *vidmem_alloc = g->mm.vidmem.cleared ?
289 &g->mm.vidmem.allocator :
290 &g->mm.vidmem.bootstrap_allocator;
291 int before_pending;
292
293 dma_dbg_alloc(g, size, flags, "vidmem");
294
295 mem->size = size;
296 size = PAGE_ALIGN(size);
297
298 if (!nvgpu_alloc_initialized(&g->mm.vidmem.allocator))
299 return -ENOSYS;
300
301 /*
302 * Our own allocator doesn't have any flags yet, and we can't
303 * kernel-map these, so require explicit flags.
304 */
305 WARN_ON(flags != NVGPU_DMA_NO_KERNEL_MAPPING);
306
307 nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex);
308 before_pending = atomic64_read(&g->mm.vidmem.bytes_pending.atomic_var);
309 addr = __nvgpu_dma_alloc(vidmem_alloc, at, size);
310 nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex);
311 if (!addr) {
312 /*
313 * If memory is known to be freed soon, let the user know that
314 * it may be available after a while.
315 */
316 if (before_pending)
317 return -EAGAIN;
318 else
319 return -ENOMEM;
320 }
321
322 if (at)
323 mem->mem_flags |= NVGPU_MEM_FLAG_FIXED;
324
325 mem->priv.sgt = nvgpu_kzalloc(g, sizeof(struct sg_table));
326 if (!mem->priv.sgt) {
327 err = -ENOMEM;
328 goto fail_physfree;
329 }
330
331 err = sg_alloc_table(mem->priv.sgt, 1, GFP_KERNEL);
332 if (err)
333 goto fail_kfree;
334
335 nvgpu_vidmem_set_page_alloc(mem->priv.sgt->sgl, addr);
336 sg_set_page(mem->priv.sgt->sgl, NULL, size, 0);
337
338 mem->aligned_size = size;
339 mem->aperture = APERTURE_VIDMEM;
340 mem->vidmem_alloc = (struct nvgpu_page_alloc *)(uintptr_t)addr;
341 mem->allocator = vidmem_alloc;
342 mem->priv.flags = flags;
343
344 nvgpu_init_list_node(&mem->clear_list_entry);
345
346 dma_dbg_alloc_done(g, mem->size, "vidmem");
347
348 return 0;
349
350fail_kfree:
351 nvgpu_kfree(g, mem->priv.sgt);
352fail_physfree:
353 nvgpu_free(&g->mm.vidmem.allocator, addr);
354 mem->size = 0;
355 return err;
356#else
357 return -ENOSYS;
358#endif
359}
360
361int nvgpu_dma_alloc_map(struct vm_gk20a *vm, size_t size,
362 struct nvgpu_mem *mem)
363{
364 return nvgpu_dma_alloc_map_flags(vm, 0, size, mem);
365}
366
367int nvgpu_dma_alloc_map_flags(struct vm_gk20a *vm, unsigned long flags,
368 size_t size, struct nvgpu_mem *mem)
369{
370 if (!nvgpu_is_enabled(gk20a_from_vm(vm), NVGPU_MM_UNIFIED_MEMORY)) {
371 /*
372 * Force the no-kernel-mapping flag on because we don't support
373 * the lack of it for vidmem - the user should not care when
374 * using nvgpu_dma_alloc_map and it's vidmem, or if there's a
375 * difference, the user should use the flag explicitly anyway.
376 */
377 int err = nvgpu_dma_alloc_map_flags_vid(vm,
378 flags | NVGPU_DMA_NO_KERNEL_MAPPING,
379 size, mem);
380
381 if (!err)
382 return 0;
383 /*
384 * Fall back to sysmem (which may then also fail) in case
385 * vidmem is exhausted.
386 */
387 }
388
389 return nvgpu_dma_alloc_map_flags_sys(vm, flags, size, mem);
390}
391
392int nvgpu_dma_alloc_map_sys(struct vm_gk20a *vm, size_t size,
393 struct nvgpu_mem *mem)
394{
395 return nvgpu_dma_alloc_map_flags_sys(vm, 0, size, mem);
396}
397
398int nvgpu_dma_alloc_map_flags_sys(struct vm_gk20a *vm, unsigned long flags,
399 size_t size, struct nvgpu_mem *mem)
400{
401 int err = nvgpu_dma_alloc_flags_sys(vm->mm->g, flags, size, mem);
402
403 if (err)
404 return err;
405
406 mem->gpu_va = nvgpu_gmmu_map(vm, mem, size, 0,
407 gk20a_mem_flag_none, false,
408 mem->aperture);
409 if (!mem->gpu_va) {
410 err = -ENOMEM;
411 goto fail_free;
412 }
413
414 return 0;
415
416fail_free:
417 nvgpu_dma_free(vm->mm->g, mem);
418 return err;
419}
420
421int nvgpu_dma_alloc_map_vid(struct vm_gk20a *vm, size_t size,
422 struct nvgpu_mem *mem)
423{
424 return nvgpu_dma_alloc_map_flags_vid(vm,
425 NVGPU_DMA_NO_KERNEL_MAPPING, size, mem);
426}
427
428int nvgpu_dma_alloc_map_flags_vid(struct vm_gk20a *vm, unsigned long flags,
429 size_t size, struct nvgpu_mem *mem)
430{
431 int err = nvgpu_dma_alloc_flags_vid(vm->mm->g, flags, size, mem);
432
433 if (err)
434 return err;
435
436 mem->gpu_va = nvgpu_gmmu_map(vm, mem, size, 0,
437 gk20a_mem_flag_none, false,
438 mem->aperture);
439 if (!mem->gpu_va) {
440 err = -ENOMEM;
441 goto fail_free;
442 }
443
444 return 0;
445
446fail_free:
447 nvgpu_dma_free(vm->mm->g, mem);
448 return err;
449}
450
451static void nvgpu_dma_free_sys(struct gk20a *g, struct nvgpu_mem *mem)
452{
453 struct device *d = dev_from_gk20a(g);
454
455 g->dma_memory_used -= mem->aligned_size;
456
457 dma_dbg_free(g, mem->size, mem->priv.flags, "sysmem");
458
459 if (!(mem->mem_flags & NVGPU_MEM_FLAG_SHADOW_COPY) &&
460 !(mem->mem_flags & __NVGPU_MEM_FLAG_NO_DMA) &&
461 (mem->cpu_va || mem->priv.pages)) {
462 if (mem->priv.flags) {
463 DEFINE_DMA_ATTRS(dma_attrs);
464
465 nvgpu_dma_flags_to_attrs(&dma_attrs, mem->priv.flags);
466
467 if (mem->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING) {
468 dma_free_attrs(d, mem->aligned_size, mem->priv.pages,
469 sg_dma_address(mem->priv.sgt->sgl),
470 __DMA_ATTR(dma_attrs));
471 } else {
472 dma_free_attrs(d, mem->aligned_size, mem->cpu_va,
473 sg_dma_address(mem->priv.sgt->sgl),
474 __DMA_ATTR(dma_attrs));
475 }
476 } else {
477 dma_free_coherent(d, mem->aligned_size, mem->cpu_va,
478 sg_dma_address(mem->priv.sgt->sgl));
479 }
480 mem->cpu_va = NULL;
481 mem->priv.pages = NULL;
482 }
483
484 /*
485 * When this flag is set we expect that pages is still populated but not
486 * by the DMA API.
487 */
488 if (mem->mem_flags & __NVGPU_MEM_FLAG_NO_DMA)
489 nvgpu_kfree(g, mem->priv.pages);
490
491 if (mem->priv.sgt)
492 nvgpu_free_sgtable(g, &mem->priv.sgt);
493
494 dma_dbg_free_done(g, mem->size, "sysmem");
495
496 mem->size = 0;
497 mem->aligned_size = 0;
498 mem->aperture = APERTURE_INVALID;
499}
500
501static void nvgpu_dma_free_vid(struct gk20a *g, struct nvgpu_mem *mem)
502{
503#if defined(CONFIG_GK20A_VIDMEM)
504 size_t mem_size = mem->size;
505
506 dma_dbg_free(g, mem->size, mem->priv.flags, "vidmem");
507
508 /* Sanity check - only this supported when allocating. */
509 WARN_ON(mem->priv.flags != NVGPU_DMA_NO_KERNEL_MAPPING);
510
511 if (mem->mem_flags & NVGPU_MEM_FLAG_USER_MEM) {
512 int err = nvgpu_vidmem_clear_list_enqueue(g, mem);
513
514 /*
515 * If there's an error here then that means we can't clear the
516 * vidmem. That's too bad; however, we still own the nvgpu_mem
517 * buf so we have to free that.
518 *
519 * We don't need to worry about the vidmem allocator itself
520 * since when that gets cleaned up in the driver shutdown path
521 * all the outstanding allocs are force freed.
522 */
523 if (err)
524 nvgpu_kfree(g, mem);
525 } else {
526 nvgpu_memset(g, mem, 0, 0, mem->aligned_size);
527 nvgpu_free(mem->allocator,
528 (u64)nvgpu_vidmem_get_page_alloc(mem->priv.sgt->sgl));
529 nvgpu_free_sgtable(g, &mem->priv.sgt);
530
531 mem->size = 0;
532 mem->aligned_size = 0;
533 mem->aperture = APERTURE_INVALID;
534 }
535
536 dma_dbg_free_done(g, mem_size, "vidmem");
537#endif
538}
539
540void nvgpu_dma_free(struct gk20a *g, struct nvgpu_mem *mem)
541{
542 switch (mem->aperture) {
543 case APERTURE_SYSMEM:
544 return nvgpu_dma_free_sys(g, mem);
545 case APERTURE_VIDMEM:
546 return nvgpu_dma_free_vid(g, mem);
547 default:
548 break; /* like free() on "null" memory */
549 }
550}
551
552void nvgpu_dma_unmap_free(struct vm_gk20a *vm, struct nvgpu_mem *mem)
553{
554 if (mem->gpu_va)
555 nvgpu_gmmu_unmap(vm, mem, mem->gpu_va);
556 mem->gpu_va = 0;
557
558 nvgpu_dma_free(vm->mm->g, mem);
559}
560
561int nvgpu_get_sgtable_attrs(struct gk20a *g, struct sg_table **sgt,
562 void *cpuva, u64 iova, size_t size, unsigned long flags)
563{
564 int err = 0;
565 struct sg_table *tbl;
566 DEFINE_DMA_ATTRS(dma_attrs);
567
568 tbl = nvgpu_kzalloc(g, sizeof(struct sg_table));
569 if (!tbl) {
570 err = -ENOMEM;
571 goto fail;
572 }
573
574 nvgpu_dma_flags_to_attrs(&dma_attrs, flags);
575 err = dma_get_sgtable_attrs(dev_from_gk20a(g), tbl, cpuva, iova,
576 size, __DMA_ATTR(dma_attrs));
577 if (err)
578 goto fail;
579
580 sg_dma_address(tbl->sgl) = iova;
581 *sgt = tbl;
582
583 return 0;
584
585fail:
586 if (tbl)
587 nvgpu_kfree(g, tbl);
588
589 return err;
590}
591
592int nvgpu_get_sgtable(struct gk20a *g, struct sg_table **sgt,
593 void *cpuva, u64 iova, size_t size)
594{
595 return nvgpu_get_sgtable_attrs(g, sgt, cpuva, iova, size, 0);
596}
597
598int nvgpu_get_sgtable_from_pages(struct gk20a *g, struct sg_table **sgt,
599 struct page **pages, u64 iova, size_t size)
600{
601 int err = 0;
602 struct sg_table *tbl;
603
604 tbl = nvgpu_kzalloc(g, sizeof(struct sg_table));
605 if (!tbl) {
606 err = -ENOMEM;
607 goto fail;
608 }
609
610 err = sg_alloc_table_from_pages(tbl, pages,
611 DIV_ROUND_UP(size, PAGE_SIZE),
612 0, size, GFP_KERNEL);
613 if (err)
614 goto fail;
615
616 sg_dma_address(tbl->sgl) = iova;
617 *sgt = tbl;
618
619 return 0;
620
621fail:
622 if (tbl)
623 nvgpu_kfree(g, tbl);
624
625 return err;
626}
627
628void nvgpu_free_sgtable(struct gk20a *g, struct sg_table **sgt)
629{
630 sg_free_table(*sgt);
631 nvgpu_kfree(g, *sgt);
632 *sgt = NULL;
633}
634
635bool nvgpu_iommuable(struct gk20a *g)
636{
637 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
638
639 return device_is_iommuable(l->dev);
640}