summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/os/linux/nvgpu_mem.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/os/linux/nvgpu_mem.c')
-rw-r--r--drivers/gpu/nvgpu/os/linux/nvgpu_mem.c613
1 files changed, 613 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/os/linux/nvgpu_mem.c b/drivers/gpu/nvgpu/os/linux/nvgpu_mem.c
new file mode 100644
index 00000000..93925803
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/nvgpu_mem.c
@@ -0,0 +1,613 @@
1/*
2 * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <nvgpu/dma.h>
18#include <nvgpu/gmmu.h>
19#include <nvgpu/nvgpu_mem.h>
20#include <nvgpu/page_allocator.h>
21#include <nvgpu/log.h>
22#include <nvgpu/bug.h>
23#include <nvgpu/enabled.h>
24#include <nvgpu/kmem.h>
25#include <nvgpu/vidmem.h>
26
27#include <nvgpu/linux/dma.h>
28#include <nvgpu/linux/vidmem.h>
29
30#include <linux/vmalloc.h>
31
32#include "os_linux.h"
33
34#include "gk20a/gk20a.h"
35#include "gk20a/mm_gk20a.h"
36#include "platform_gk20a.h"
37
38static u64 __nvgpu_sgl_phys(struct gk20a *g, struct nvgpu_sgl *sgl)
39{
40 struct device *dev = dev_from_gk20a(g);
41 struct gk20a_platform *platform = gk20a_get_platform(dev);
42 u64 ipa = sg_phys((struct scatterlist *)sgl);
43
44 if (platform->phys_addr)
45 return platform->phys_addr(g, ipa);
46
47 return ipa;
48}
49
50int nvgpu_mem_begin(struct gk20a *g, struct nvgpu_mem *mem)
51{
52 void *cpu_va;
53 pgprot_t prot = nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) ?
54 PAGE_KERNEL :
55 pgprot_writecombine(PAGE_KERNEL);
56
57 if (mem->aperture != APERTURE_SYSMEM)
58 return 0;
59
60 /*
61 * WAR for bug 2040115: we already will always have a coherent vmap()
62 * for all sysmem buffers. The prot settings are left alone since
63 * eventually this should be deleted.
64 */
65 if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
66 return 0;
67
68 /*
69 * A CPU mapping is implicitly made for all SYSMEM DMA allocations that
70 * don't have NVGPU_DMA_NO_KERNEL_MAPPING. Thus we don't need to make
71 * another CPU mapping.
72 */
73 if (!(mem->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING))
74 return 0;
75
76 if (WARN_ON(mem->cpu_va)) {
77 nvgpu_warn(g, "nested");
78 return -EBUSY;
79 }
80
81 cpu_va = vmap(mem->priv.pages,
82 PAGE_ALIGN(mem->size) >> PAGE_SHIFT,
83 0, prot);
84
85 if (WARN_ON(!cpu_va))
86 return -ENOMEM;
87
88 mem->cpu_va = cpu_va;
89 return 0;
90}
91
92void nvgpu_mem_end(struct gk20a *g, struct nvgpu_mem *mem)
93{
94 if (mem->aperture != APERTURE_SYSMEM)
95 return;
96
97 /*
98 * WAR for bug 2040115: skip this since the map will be taken care of
99 * during the free in the DMA API.
100 */
101 if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
102 return;
103
104 /*
105 * Similar to nvgpu_mem_begin() we don't need to unmap the CPU mapping
106 * already made by the DMA API.
107 */
108 if (!(mem->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING))
109 return;
110
111 vunmap(mem->cpu_va);
112 mem->cpu_va = NULL;
113}
114
115static void pramin_access_batch_rd_n(struct gk20a *g, u32 start, u32 words, u32 **arg)
116{
117 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
118 u32 r = start, *dest_u32 = *arg;
119
120 if (!l->regs) {
121 __gk20a_warn_on_no_regs();
122 return;
123 }
124
125 while (words--) {
126 *dest_u32++ = gk20a_readl(g, r);
127 r += sizeof(u32);
128 }
129
130 *arg = dest_u32;
131}
132
133u32 nvgpu_mem_rd32(struct gk20a *g, struct nvgpu_mem *mem, u32 w)
134{
135 u32 data = 0;
136
137 if (mem->aperture == APERTURE_SYSMEM) {
138 u32 *ptr = mem->cpu_va;
139
140 WARN_ON(!ptr);
141 data = ptr[w];
142#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
143 nvgpu_log(g, gpu_dbg_mem, " %p = 0x%x", ptr + w, data);
144#endif
145 } else if (mem->aperture == APERTURE_VIDMEM) {
146 u32 value;
147 u32 *p = &value;
148
149 nvgpu_pramin_access_batched(g, mem, w * sizeof(u32),
150 sizeof(u32), pramin_access_batch_rd_n, &p);
151
152 data = value;
153
154 } else {
155 WARN_ON("Accessing unallocated nvgpu_mem");
156 }
157
158 return data;
159}
160
161u32 nvgpu_mem_rd(struct gk20a *g, struct nvgpu_mem *mem, u32 offset)
162{
163 WARN_ON(offset & 3);
164 return nvgpu_mem_rd32(g, mem, offset / sizeof(u32));
165}
166
167void nvgpu_mem_rd_n(struct gk20a *g, struct nvgpu_mem *mem,
168 u32 offset, void *dest, u32 size)
169{
170 WARN_ON(offset & 3);
171 WARN_ON(size & 3);
172
173 if (mem->aperture == APERTURE_SYSMEM) {
174 u8 *src = (u8 *)mem->cpu_va + offset;
175
176 WARN_ON(!mem->cpu_va);
177 memcpy(dest, src, size);
178#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
179 if (size)
180 nvgpu_log(g, gpu_dbg_mem, " %p = 0x%x ... [%d bytes]",
181 src, *dest, size);
182#endif
183 } else if (mem->aperture == APERTURE_VIDMEM) {
184 u32 *dest_u32 = dest;
185
186 nvgpu_pramin_access_batched(g, mem, offset, size,
187 pramin_access_batch_rd_n, &dest_u32);
188 } else {
189 WARN_ON("Accessing unallocated nvgpu_mem");
190 }
191}
192
193static void pramin_access_batch_wr_n(struct gk20a *g, u32 start, u32 words, u32 **arg)
194{
195 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
196 u32 r = start, *src_u32 = *arg;
197
198 if (!l->regs) {
199 __gk20a_warn_on_no_regs();
200 return;
201 }
202
203 while (words--) {
204 writel_relaxed(*src_u32++, l->regs + r);
205 r += sizeof(u32);
206 }
207
208 *arg = src_u32;
209}
210
211void nvgpu_mem_wr32(struct gk20a *g, struct nvgpu_mem *mem, u32 w, u32 data)
212{
213 if (mem->aperture == APERTURE_SYSMEM) {
214 u32 *ptr = mem->cpu_va;
215
216 WARN_ON(!ptr);
217#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
218 nvgpu_log(g, gpu_dbg_mem, " %p = 0x%x", ptr + w, data);
219#endif
220 ptr[w] = data;
221 } else if (mem->aperture == APERTURE_VIDMEM) {
222 u32 value = data;
223 u32 *p = &value;
224
225 nvgpu_pramin_access_batched(g, mem, w * sizeof(u32),
226 sizeof(u32), pramin_access_batch_wr_n, &p);
227 if (!mem->skip_wmb)
228 wmb();
229 } else {
230 WARN_ON("Accessing unallocated nvgpu_mem");
231 }
232}
233
234void nvgpu_mem_wr(struct gk20a *g, struct nvgpu_mem *mem, u32 offset, u32 data)
235{
236 WARN_ON(offset & 3);
237 nvgpu_mem_wr32(g, mem, offset / sizeof(u32), data);
238}
239
240void nvgpu_mem_wr_n(struct gk20a *g, struct nvgpu_mem *mem, u32 offset,
241 void *src, u32 size)
242{
243 WARN_ON(offset & 3);
244 WARN_ON(size & 3);
245
246 if (mem->aperture == APERTURE_SYSMEM) {
247 u8 *dest = (u8 *)mem->cpu_va + offset;
248
249 WARN_ON(!mem->cpu_va);
250#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
251 if (size)
252 nvgpu_log(g, gpu_dbg_mem, " %p = 0x%x ... [%d bytes]",
253 dest, *src, size);
254#endif
255 memcpy(dest, src, size);
256 } else if (mem->aperture == APERTURE_VIDMEM) {
257 u32 *src_u32 = src;
258
259 nvgpu_pramin_access_batched(g, mem, offset, size,
260 pramin_access_batch_wr_n, &src_u32);
261 if (!mem->skip_wmb)
262 wmb();
263 } else {
264 WARN_ON("Accessing unallocated nvgpu_mem");
265 }
266}
267
268static void pramin_access_batch_set(struct gk20a *g, u32 start, u32 words, u32 **arg)
269{
270 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
271 u32 r = start, repeat = **arg;
272
273 if (!l->regs) {
274 __gk20a_warn_on_no_regs();
275 return;
276 }
277
278 while (words--) {
279 writel_relaxed(repeat, l->regs + r);
280 r += sizeof(u32);
281 }
282}
283
284void nvgpu_memset(struct gk20a *g, struct nvgpu_mem *mem, u32 offset,
285 u32 c, u32 size)
286{
287 WARN_ON(offset & 3);
288 WARN_ON(size & 3);
289 WARN_ON(c & ~0xff);
290
291 c &= 0xff;
292
293 if (mem->aperture == APERTURE_SYSMEM) {
294 u8 *dest = (u8 *)mem->cpu_va + offset;
295
296 WARN_ON(!mem->cpu_va);
297#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
298 if (size)
299 nvgpu_log(g, gpu_dbg_mem, " %p = 0x%x [times %d]",
300 dest, c, size);
301#endif
302 memset(dest, c, size);
303 } else if (mem->aperture == APERTURE_VIDMEM) {
304 u32 repeat_value = c | (c << 8) | (c << 16) | (c << 24);
305 u32 *p = &repeat_value;
306
307 nvgpu_pramin_access_batched(g, mem, offset, size,
308 pramin_access_batch_set, &p);
309 if (!mem->skip_wmb)
310 wmb();
311 } else {
312 WARN_ON("Accessing unallocated nvgpu_mem");
313 }
314}
315
316/*
317 * Obtain a SYSMEM address from a Linux SGL. This should eventually go away
318 * and/or become private to this file once all bad usages of Linux SGLs are
319 * cleaned up in the driver.
320 */
321u64 nvgpu_mem_get_addr_sgl(struct gk20a *g, struct scatterlist *sgl)
322{
323 if (nvgpu_is_enabled(g, NVGPU_MM_USE_PHYSICAL_SG) ||
324 !nvgpu_iommuable(g))
325 return g->ops.mm.gpu_phys_addr(g, NULL,
326 __nvgpu_sgl_phys(g, (struct nvgpu_sgl *)sgl));
327
328 if (sg_dma_address(sgl) == 0)
329 return g->ops.mm.gpu_phys_addr(g, NULL,
330 __nvgpu_sgl_phys(g, (struct nvgpu_sgl *)sgl));
331
332 if (sg_dma_address(sgl) == DMA_ERROR_CODE)
333 return 0;
334
335 return nvgpu_mem_iommu_translate(g, sg_dma_address(sgl));
336}
337
338/*
339 * Obtain the address the GPU should use from the %mem assuming this is a SYSMEM
340 * allocation.
341 */
342static u64 nvgpu_mem_get_addr_sysmem(struct gk20a *g, struct nvgpu_mem *mem)
343{
344 return nvgpu_mem_get_addr_sgl(g, mem->priv.sgt->sgl);
345}
346
347/*
348 * Return the base address of %mem. Handles whether this is a VIDMEM or SYSMEM
349 * allocation.
350 *
351 * Note: this API does not make sense to use for _VIDMEM_ buffers with greater
352 * than one scatterlist chunk. If there's more than one scatterlist chunk then
353 * the buffer will not be contiguous. As such the base address probably isn't
354 * very useful. This is true for SYSMEM as well, if there's no IOMMU.
355 *
356 * However! It _is_ OK to use this on discontiguous sysmem buffers _if_ there's
357 * an IOMMU present and enabled for the GPU.
358 *
359 * %attrs can be NULL. If it is not NULL then it may be inspected to determine
360 * if the address needs to be modified before writing into a PTE.
361 */
362u64 nvgpu_mem_get_addr(struct gk20a *g, struct nvgpu_mem *mem)
363{
364 struct nvgpu_page_alloc *alloc;
365
366 if (mem->aperture == APERTURE_SYSMEM)
367 return nvgpu_mem_get_addr_sysmem(g, mem);
368
369 /*
370 * Otherwise get the vidmem address.
371 */
372 alloc = mem->vidmem_alloc;
373
374 /* This API should not be used with > 1 chunks */
375 WARN_ON(alloc->nr_chunks != 1);
376
377 return alloc->base;
378}
379
380/*
381 * This should only be used on contiguous buffers regardless of whether
382 * there's an IOMMU present/enabled. This applies to both SYSMEM and
383 * VIDMEM.
384 */
385u64 nvgpu_mem_get_phys_addr(struct gk20a *g, struct nvgpu_mem *mem)
386{
387 /*
388 * For a VIDMEM buf, this is identical to simply get_addr() so just fall
389 * back to that.
390 */
391 if (mem->aperture == APERTURE_VIDMEM)
392 return nvgpu_mem_get_addr(g, mem);
393
394 return __nvgpu_sgl_phys(g, (struct nvgpu_sgl *)mem->priv.sgt->sgl);
395}
396
397/*
398 * Be careful how you use this! You are responsible for correctly freeing this
399 * memory.
400 */
401int nvgpu_mem_create_from_mem(struct gk20a *g,
402 struct nvgpu_mem *dest, struct nvgpu_mem *src,
403 int start_page, int nr_pages)
404{
405 int ret;
406 u64 start = start_page * PAGE_SIZE;
407 u64 size = nr_pages * PAGE_SIZE;
408 dma_addr_t new_iova;
409
410 if (src->aperture != APERTURE_SYSMEM)
411 return -EINVAL;
412
413 /* Some silly things a caller might do... */
414 if (size > src->size)
415 return -EINVAL;
416 if ((start + size) > src->size)
417 return -EINVAL;
418
419 dest->mem_flags = src->mem_flags | NVGPU_MEM_FLAG_SHADOW_COPY;
420 dest->aperture = src->aperture;
421 dest->skip_wmb = src->skip_wmb;
422 dest->size = size;
423
424 /*
425 * Re-use the CPU mapping only if the mapping was made by the DMA API.
426 *
427 * Bug 2040115: the DMA API wrapper makes the mapping that we should
428 * re-use.
429 */
430 if (!(src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING) ||
431 nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
432 dest->cpu_va = src->cpu_va + (PAGE_SIZE * start_page);
433
434 dest->priv.pages = src->priv.pages + start_page;
435 dest->priv.flags = src->priv.flags;
436
437 new_iova = sg_dma_address(src->priv.sgt->sgl) ?
438 sg_dma_address(src->priv.sgt->sgl) + start : 0;
439
440 /*
441 * Make a new SG table that is based only on the subset of pages that
442 * is passed to us. This table gets freed by the dma free routines.
443 */
444 if (src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING)
445 ret = nvgpu_get_sgtable_from_pages(g, &dest->priv.sgt,
446 src->priv.pages + start_page,
447 new_iova, size);
448 else
449 ret = nvgpu_get_sgtable(g, &dest->priv.sgt, dest->cpu_va,
450 new_iova, size);
451
452 return ret;
453}
454
455int __nvgpu_mem_create_from_pages(struct gk20a *g, struct nvgpu_mem *dest,
456 struct page **pages, int nr_pages)
457{
458 struct sg_table *sgt;
459 struct page **our_pages =
460 nvgpu_kmalloc(g, sizeof(struct page *) * nr_pages);
461
462 if (!our_pages)
463 return -ENOMEM;
464
465 memcpy(our_pages, pages, sizeof(struct page *) * nr_pages);
466
467 if (nvgpu_get_sgtable_from_pages(g, &sgt, pages, 0,
468 nr_pages * PAGE_SIZE)) {
469 nvgpu_kfree(g, our_pages);
470 return -ENOMEM;
471 }
472
473 /*
474 * If we are making an SGT from physical pages we can be reasonably
475 * certain that this should bypass the SMMU - thus we set the DMA (aka
476 * IOVA) address to 0. This tells the GMMU mapping code to not make a
477 * mapping directed to the SMMU.
478 */
479 sg_dma_address(sgt->sgl) = 0;
480
481 dest->mem_flags = __NVGPU_MEM_FLAG_NO_DMA;
482 dest->aperture = APERTURE_SYSMEM;
483 dest->skip_wmb = 0;
484 dest->size = PAGE_SIZE * nr_pages;
485
486 dest->priv.flags = 0;
487 dest->priv.pages = our_pages;
488 dest->priv.sgt = sgt;
489
490 return 0;
491}
492
493#ifdef CONFIG_TEGRA_GK20A_NVHOST
494int __nvgpu_mem_create_from_phys(struct gk20a *g, struct nvgpu_mem *dest,
495 u64 src_phys, int nr_pages)
496{
497 struct page **pages =
498 nvgpu_kmalloc(g, sizeof(struct page *) * nr_pages);
499 int i, ret = 0;
500
501 if (!pages)
502 return -ENOMEM;
503
504 for (i = 0; i < nr_pages; i++)
505 pages[i] = phys_to_page(src_phys + PAGE_SIZE * i);
506
507 ret = __nvgpu_mem_create_from_pages(g, dest, pages, nr_pages);
508 nvgpu_kfree(g, pages);
509
510 return ret;
511}
512#endif
513
514static struct nvgpu_sgl *nvgpu_mem_linux_sgl_next(struct nvgpu_sgl *sgl)
515{
516 return (struct nvgpu_sgl *)sg_next((struct scatterlist *)sgl);
517}
518
519static u64 nvgpu_mem_linux_sgl_phys(struct gk20a *g, struct nvgpu_sgl *sgl)
520{
521 return (u64)__nvgpu_sgl_phys(g, sgl);
522}
523
524static u64 nvgpu_mem_linux_sgl_dma(struct nvgpu_sgl *sgl)
525{
526 return (u64)sg_dma_address((struct scatterlist *)sgl);
527}
528
529static u64 nvgpu_mem_linux_sgl_length(struct nvgpu_sgl *sgl)
530{
531 return (u64)((struct scatterlist *)sgl)->length;
532}
533
534static u64 nvgpu_mem_linux_sgl_gpu_addr(struct gk20a *g,
535 struct nvgpu_sgl *sgl,
536 struct nvgpu_gmmu_attrs *attrs)
537{
538 if (sg_dma_address((struct scatterlist *)sgl) == 0)
539 return g->ops.mm.gpu_phys_addr(g, attrs,
540 __nvgpu_sgl_phys(g, sgl));
541
542 if (sg_dma_address((struct scatterlist *)sgl) == DMA_ERROR_CODE)
543 return 0;
544
545 return nvgpu_mem_iommu_translate(g,
546 sg_dma_address((struct scatterlist *)sgl));
547}
548
549static bool nvgpu_mem_linux_sgt_iommuable(struct gk20a *g,
550 struct nvgpu_sgt *sgt)
551{
552 if (nvgpu_is_enabled(g, NVGPU_MM_USE_PHYSICAL_SG))
553 return false;
554 return true;
555}
556
557static void nvgpu_mem_linux_sgl_free(struct gk20a *g, struct nvgpu_sgt *sgt)
558{
559 /*
560 * Free this SGT. All we do is free the passed SGT. The actual Linux
561 * SGT/SGL needs to be freed separately.
562 */
563 nvgpu_kfree(g, sgt);
564}
565
566static const struct nvgpu_sgt_ops nvgpu_linux_sgt_ops = {
567 .sgl_next = nvgpu_mem_linux_sgl_next,
568 .sgl_phys = nvgpu_mem_linux_sgl_phys,
569 .sgl_dma = nvgpu_mem_linux_sgl_dma,
570 .sgl_length = nvgpu_mem_linux_sgl_length,
571 .sgl_gpu_addr = nvgpu_mem_linux_sgl_gpu_addr,
572 .sgt_iommuable = nvgpu_mem_linux_sgt_iommuable,
573 .sgt_free = nvgpu_mem_linux_sgl_free,
574};
575
576static struct nvgpu_sgt *__nvgpu_mem_get_sgl_from_vidmem(
577 struct gk20a *g,
578 struct scatterlist *linux_sgl)
579{
580 struct nvgpu_page_alloc *vidmem_alloc;
581
582 vidmem_alloc = nvgpu_vidmem_get_page_alloc(linux_sgl);
583 if (!vidmem_alloc)
584 return NULL;
585
586 return &vidmem_alloc->sgt;
587}
588
589struct nvgpu_sgt *nvgpu_linux_sgt_create(struct gk20a *g, struct sg_table *sgt)
590{
591 struct nvgpu_sgt *nvgpu_sgt;
592 struct scatterlist *linux_sgl = sgt->sgl;
593
594 if (nvgpu_addr_is_vidmem_page_alloc(sg_dma_address(linux_sgl)))
595 return __nvgpu_mem_get_sgl_from_vidmem(g, linux_sgl);
596
597 nvgpu_sgt = nvgpu_kzalloc(g, sizeof(*nvgpu_sgt));
598 if (!nvgpu_sgt)
599 return NULL;
600
601 nvgpu_log(g, gpu_dbg_sgl, "Making Linux SGL!");
602
603 nvgpu_sgt->sgl = (struct nvgpu_sgl *)linux_sgl;
604 nvgpu_sgt->ops = &nvgpu_linux_sgt_ops;
605
606 return nvgpu_sgt;
607}
608
609struct nvgpu_sgt *nvgpu_sgt_create_from_mem(struct gk20a *g,
610 struct nvgpu_mem *mem)
611{
612 return nvgpu_linux_sgt_create(g, mem->priv.sgt);
613}