summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/common/linux/nvgpu_mem.c')
-rw-r--r--drivers/gpu/nvgpu/common/linux/nvgpu_mem.c594
1 files changed, 594 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
new file mode 100644
index 00000000..2587d56a
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
@@ -0,0 +1,594 @@
1/*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <nvgpu/dma.h>
18#include <nvgpu/gmmu.h>
19#include <nvgpu/nvgpu_mem.h>
20#include <nvgpu/page_allocator.h>
21#include <nvgpu/log.h>
22#include <nvgpu/bug.h>
23#include <nvgpu/enabled.h>
24#include <nvgpu/kmem.h>
25#include <nvgpu/vidmem.h>
26
27#include <nvgpu/linux/dma.h>
28#include <nvgpu/linux/vidmem.h>
29
30#include "os_linux.h"
31
32#include "gk20a/gk20a.h"
33#include "gk20a/mm_gk20a.h"
34
35u32 __nvgpu_aperture_mask(struct gk20a *g, enum nvgpu_aperture aperture,
36 u32 sysmem_mask, u32 vidmem_mask)
37{
38 switch (aperture) {
39 case APERTURE_SYSMEM:
40 /* some igpus consider system memory vidmem */
41 return nvgpu_is_enabled(g, NVGPU_MM_HONORS_APERTURE)
42 ? sysmem_mask : vidmem_mask;
43 case APERTURE_VIDMEM:
44 /* for dgpus only */
45 return vidmem_mask;
46 case APERTURE_INVALID:
47 WARN_ON("Bad aperture");
48 }
49 return 0;
50}
51
52u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem,
53 u32 sysmem_mask, u32 vidmem_mask)
54{
55 return __nvgpu_aperture_mask(g, mem->aperture,
56 sysmem_mask, vidmem_mask);
57}
58
59int nvgpu_mem_begin(struct gk20a *g, struct nvgpu_mem *mem)
60{
61 void *cpu_va;
62
63 if (mem->aperture != APERTURE_SYSMEM || g->mm.force_pramin)
64 return 0;
65
66 /*
67 * A CPU mapping is implicitly made for all SYSMEM DMA allocations that
68 * don't have NVGPU_DMA_NO_KERNEL_MAPPING. Thus we don't need to make
69 * another CPU mapping.
70 */
71 if (!(mem->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING))
72 return 0;
73
74 if (WARN_ON(mem->cpu_va)) {
75 nvgpu_warn(g, "nested");
76 return -EBUSY;
77 }
78
79 cpu_va = vmap(mem->priv.pages,
80 PAGE_ALIGN(mem->size) >> PAGE_SHIFT,
81 0, pgprot_writecombine(PAGE_KERNEL));
82
83 if (WARN_ON(!cpu_va))
84 return -ENOMEM;
85
86 mem->cpu_va = cpu_va;
87 return 0;
88}
89
90void nvgpu_mem_end(struct gk20a *g, struct nvgpu_mem *mem)
91{
92 if (mem->aperture != APERTURE_SYSMEM || g->mm.force_pramin)
93 return;
94
95 /*
96 * Similar to nvgpu_mem_begin() we don't need to unmap the CPU mapping
97 * already made by the DMA API.
98 */
99 if (!(mem->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING))
100 return;
101
102 vunmap(mem->cpu_va);
103 mem->cpu_va = NULL;
104}
105
106static void pramin_access_batch_rd_n(struct gk20a *g, u32 start, u32 words, u32 **arg)
107{
108 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
109 u32 r = start, *dest_u32 = *arg;
110
111 if (!l->regs) {
112 __gk20a_warn_on_no_regs();
113 return;
114 }
115
116 while (words--) {
117 *dest_u32++ = gk20a_readl(g, r);
118 r += sizeof(u32);
119 }
120
121 *arg = dest_u32;
122}
123
124u32 nvgpu_mem_rd32(struct gk20a *g, struct nvgpu_mem *mem, u32 w)
125{
126 u32 data = 0;
127
128 if (mem->aperture == APERTURE_SYSMEM && !g->mm.force_pramin) {
129 u32 *ptr = mem->cpu_va;
130
131 WARN_ON(!ptr);
132 data = ptr[w];
133#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
134 gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr + w, data);
135#endif
136 } else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) {
137 u32 value;
138 u32 *p = &value;
139
140 nvgpu_pramin_access_batched(g, mem, w * sizeof(u32),
141 sizeof(u32), pramin_access_batch_rd_n, &p);
142
143 data = value;
144
145 } else {
146 WARN_ON("Accessing unallocated nvgpu_mem");
147 }
148
149 return data;
150}
151
152u32 nvgpu_mem_rd(struct gk20a *g, struct nvgpu_mem *mem, u32 offset)
153{
154 WARN_ON(offset & 3);
155 return nvgpu_mem_rd32(g, mem, offset / sizeof(u32));
156}
157
158void nvgpu_mem_rd_n(struct gk20a *g, struct nvgpu_mem *mem,
159 u32 offset, void *dest, u32 size)
160{
161 WARN_ON(offset & 3);
162 WARN_ON(size & 3);
163
164 if (mem->aperture == APERTURE_SYSMEM && !g->mm.force_pramin) {
165 u8 *src = (u8 *)mem->cpu_va + offset;
166
167 WARN_ON(!mem->cpu_va);
168 memcpy(dest, src, size);
169#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
170 if (size)
171 gk20a_dbg(gpu_dbg_mem, " %p = 0x%x ... [%d bytes]",
172 src, *dest, size);
173#endif
174 } else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) {
175 u32 *dest_u32 = dest;
176
177 nvgpu_pramin_access_batched(g, mem, offset, size,
178 pramin_access_batch_rd_n, &dest_u32);
179 } else {
180 WARN_ON("Accessing unallocated nvgpu_mem");
181 }
182}
183
184static void pramin_access_batch_wr_n(struct gk20a *g, u32 start, u32 words, u32 **arg)
185{
186 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
187 u32 r = start, *src_u32 = *arg;
188
189 if (!l->regs) {
190 __gk20a_warn_on_no_regs();
191 return;
192 }
193
194 while (words--) {
195 writel_relaxed(*src_u32++, l->regs + r);
196 r += sizeof(u32);
197 }
198
199 *arg = src_u32;
200}
201
202void nvgpu_mem_wr32(struct gk20a *g, struct nvgpu_mem *mem, u32 w, u32 data)
203{
204 if (mem->aperture == APERTURE_SYSMEM && !g->mm.force_pramin) {
205 u32 *ptr = mem->cpu_va;
206
207 WARN_ON(!ptr);
208#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
209 gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr + w, data);
210#endif
211 ptr[w] = data;
212 } else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) {
213 u32 value = data;
214 u32 *p = &value;
215
216 nvgpu_pramin_access_batched(g, mem, w * sizeof(u32),
217 sizeof(u32), pramin_access_batch_wr_n, &p);
218 if (!mem->skip_wmb)
219 wmb();
220 } else {
221 WARN_ON("Accessing unallocated nvgpu_mem");
222 }
223}
224
225void nvgpu_mem_wr(struct gk20a *g, struct nvgpu_mem *mem, u32 offset, u32 data)
226{
227 WARN_ON(offset & 3);
228 nvgpu_mem_wr32(g, mem, offset / sizeof(u32), data);
229}
230
231void nvgpu_mem_wr_n(struct gk20a *g, struct nvgpu_mem *mem, u32 offset,
232 void *src, u32 size)
233{
234 WARN_ON(offset & 3);
235 WARN_ON(size & 3);
236
237 if (mem->aperture == APERTURE_SYSMEM && !g->mm.force_pramin) {
238 u8 *dest = (u8 *)mem->cpu_va + offset;
239
240 WARN_ON(!mem->cpu_va);
241#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
242 if (size)
243 gk20a_dbg(gpu_dbg_mem, " %p = 0x%x ... [%d bytes]",
244 dest, *src, size);
245#endif
246 memcpy(dest, src, size);
247 } else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) {
248 u32 *src_u32 = src;
249
250 nvgpu_pramin_access_batched(g, mem, offset, size,
251 pramin_access_batch_wr_n, &src_u32);
252 if (!mem->skip_wmb)
253 wmb();
254 } else {
255 WARN_ON("Accessing unallocated nvgpu_mem");
256 }
257}
258
259static void pramin_access_batch_set(struct gk20a *g, u32 start, u32 words, u32 **arg)
260{
261 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
262 u32 r = start, repeat = **arg;
263
264 if (!l->regs) {
265 __gk20a_warn_on_no_regs();
266 return;
267 }
268
269 while (words--) {
270 writel_relaxed(repeat, l->regs + r);
271 r += sizeof(u32);
272 }
273}
274
275void nvgpu_memset(struct gk20a *g, struct nvgpu_mem *mem, u32 offset,
276 u32 c, u32 size)
277{
278 WARN_ON(offset & 3);
279 WARN_ON(size & 3);
280 WARN_ON(c & ~0xff);
281
282 c &= 0xff;
283
284 if (mem->aperture == APERTURE_SYSMEM && !g->mm.force_pramin) {
285 u8 *dest = (u8 *)mem->cpu_va + offset;
286
287 WARN_ON(!mem->cpu_va);
288#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
289 if (size)
290 gk20a_dbg(gpu_dbg_mem, " %p = 0x%x [times %d]",
291 dest, c, size);
292#endif
293 memset(dest, c, size);
294 } else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) {
295 u32 repeat_value = c | (c << 8) | (c << 16) | (c << 24);
296 u32 *p = &repeat_value;
297
298 nvgpu_pramin_access_batched(g, mem, offset, size,
299 pramin_access_batch_set, &p);
300 if (!mem->skip_wmb)
301 wmb();
302 } else {
303 WARN_ON("Accessing unallocated nvgpu_mem");
304 }
305}
306
307/*
308 * Obtain a SYSMEM address from a Linux SGL. This should eventually go away
309 * and/or become private to this file once all bad usages of Linux SGLs are
310 * cleaned up in the driver.
311 */
312u64 nvgpu_mem_get_addr_sgl(struct gk20a *g, struct scatterlist *sgl)
313{
314 struct nvgpu_os_linux *l = container_of(g, struct nvgpu_os_linux, g);
315
316 if (!device_is_iommuable(l->dev))
317 return g->ops.mm.gpu_phys_addr(g, NULL, sg_phys(sgl));
318
319 if (sg_dma_address(sgl) == 0)
320 return g->ops.mm.gpu_phys_addr(g, NULL, sg_phys(sgl));
321
322 if (sg_dma_address(sgl) == DMA_ERROR_CODE)
323 return 0;
324
325 return nvgpu_mem_iommu_translate(g, sg_dma_address(sgl));
326}
327
328/*
329 * Obtain the address the GPU should use from the %mem assuming this is a SYSMEM
330 * allocation.
331 */
332static u64 nvgpu_mem_get_addr_sysmem(struct gk20a *g, struct nvgpu_mem *mem)
333{
334 return nvgpu_mem_get_addr_sgl(g, mem->priv.sgt->sgl);
335}
336
337/*
338 * Return the base address of %mem. Handles whether this is a VIDMEM or SYSMEM
339 * allocation.
340 *
341 * Note: this API does not make sense to use for _VIDMEM_ buffers with greater
342 * than one scatterlist chunk. If there's more than one scatterlist chunk then
343 * the buffer will not be contiguous. As such the base address probably isn't
344 * very useful. This is true for SYSMEM as well, if there's no IOMMU.
345 *
346 * However! It _is_ OK to use this on discontiguous sysmem buffers _if_ there's
347 * an IOMMU present and enabled for the GPU.
348 *
349 * %attrs can be NULL. If it is not NULL then it may be inspected to determine
350 * if the address needs to be modified before writing into a PTE.
351 */
352u64 nvgpu_mem_get_addr(struct gk20a *g, struct nvgpu_mem *mem)
353{
354 struct nvgpu_page_alloc *alloc;
355
356 if (mem->aperture == APERTURE_SYSMEM)
357 return nvgpu_mem_get_addr_sysmem(g, mem);
358
359 /*
360 * Otherwise get the vidmem address.
361 */
362 alloc = mem->vidmem_alloc;
363
364 /* This API should not be used with > 1 chunks */
365 WARN_ON(alloc->nr_chunks != 1);
366
367 return alloc->base;
368}
369
370/*
371 * This should only be used on contiguous buffers regardless of whether
372 * there's an IOMMU present/enabled. This applies to both SYSMEM and
373 * VIDMEM.
374 */
375u64 nvgpu_mem_get_phys_addr(struct gk20a *g, struct nvgpu_mem *mem)
376{
377 /*
378 * For a VIDMEM buf, this is identical to simply get_addr() so just fall
379 * back to that.
380 */
381 if (mem->aperture == APERTURE_VIDMEM)
382 return nvgpu_mem_get_addr(g, mem);
383
384 return sg_phys(mem->priv.sgt->sgl);
385}
386
387/*
388 * Be careful how you use this! You are responsible for correctly freeing this
389 * memory.
390 */
391int nvgpu_mem_create_from_mem(struct gk20a *g,
392 struct nvgpu_mem *dest, struct nvgpu_mem *src,
393 int start_page, int nr_pages)
394{
395 int ret;
396 u64 start = start_page * PAGE_SIZE;
397 u64 size = nr_pages * PAGE_SIZE;
398 dma_addr_t new_iova;
399
400 if (src->aperture != APERTURE_SYSMEM)
401 return -EINVAL;
402
403 /* Some silly things a caller might do... */
404 if (size > src->size)
405 return -EINVAL;
406 if ((start + size) > src->size)
407 return -EINVAL;
408
409 dest->mem_flags = src->mem_flags | NVGPU_MEM_FLAG_SHADOW_COPY;
410 dest->aperture = src->aperture;
411 dest->skip_wmb = src->skip_wmb;
412 dest->size = size;
413
414 /*
415 * Re-use the CPU mapping only if the mapping was made by the DMA API.
416 */
417 if (!(src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING))
418 dest->cpu_va = src->cpu_va + (PAGE_SIZE * start_page);
419
420 dest->priv.pages = src->priv.pages + start_page;
421 dest->priv.flags = src->priv.flags;
422
423 new_iova = sg_dma_address(src->priv.sgt->sgl) ?
424 sg_dma_address(src->priv.sgt->sgl) + start : 0;
425
426 /*
427 * Make a new SG table that is based only on the subset of pages that
428 * is passed to us. This table gets freed by the dma free routines.
429 */
430 if (src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING)
431 ret = nvgpu_get_sgtable_from_pages(g, &dest->priv.sgt,
432 src->priv.pages + start_page,
433 new_iova, size);
434 else
435 ret = nvgpu_get_sgtable(g, &dest->priv.sgt, dest->cpu_va,
436 new_iova, size);
437
438 return ret;
439}
440
441int __nvgpu_mem_create_from_pages(struct gk20a *g, struct nvgpu_mem *dest,
442 struct page **pages, int nr_pages)
443{
444 struct sg_table *sgt;
445 struct page **our_pages =
446 nvgpu_kmalloc(g, sizeof(struct page *) * nr_pages);
447
448 if (!our_pages)
449 return -ENOMEM;
450
451 memcpy(our_pages, pages, sizeof(struct page *) * nr_pages);
452
453 if (nvgpu_get_sgtable_from_pages(g, &sgt, pages, 0,
454 nr_pages * PAGE_SIZE)) {
455 nvgpu_kfree(g, our_pages);
456 return -ENOMEM;
457 }
458
459 /*
460 * If we are making an SGT from physical pages we can be reasonably
461 * certain that this should bypass the SMMU - thus we set the DMA (aka
462 * IOVA) address to 0. This tells the GMMU mapping code to not make a
463 * mapping directed to the SMMU.
464 */
465 sg_dma_address(sgt->sgl) = 0;
466
467 dest->mem_flags = __NVGPU_MEM_FLAG_NO_DMA;
468 dest->aperture = APERTURE_SYSMEM;
469 dest->skip_wmb = 0;
470 dest->size = PAGE_SIZE * nr_pages;
471
472 dest->priv.flags = 0;
473 dest->priv.pages = our_pages;
474 dest->priv.sgt = sgt;
475
476 return 0;
477}
478
479int __nvgpu_mem_create_from_phys(struct gk20a *g, struct nvgpu_mem *dest,
480 u64 src_phys, int nr_pages)
481{
482 struct page **pages =
483 nvgpu_kmalloc(g, sizeof(struct page *) * nr_pages);
484 int i, ret = 0;
485
486 if (!pages)
487 return -ENOMEM;
488
489 for (i = 0; i < nr_pages; i++)
490 pages[i] = phys_to_page(src_phys + PAGE_SIZE * i);
491
492 ret = __nvgpu_mem_create_from_pages(g, dest, pages, nr_pages);
493 nvgpu_kfree(g, pages);
494
495 return ret;
496}
497
498static void *nvgpu_mem_linux_sgl_next(void *sgl)
499{
500 return sg_next((struct scatterlist *)sgl);
501}
502
503static u64 nvgpu_mem_linux_sgl_phys(void *sgl)
504{
505 return (u64)sg_phys((struct scatterlist *)sgl);
506}
507
508static u64 nvgpu_mem_linux_sgl_dma(void *sgl)
509{
510 return (u64)sg_dma_address((struct scatterlist *)sgl);
511}
512
513static u64 nvgpu_mem_linux_sgl_length(void *sgl)
514{
515 return (u64)((struct scatterlist *)sgl)->length;
516}
517
518static u64 nvgpu_mem_linux_sgl_gpu_addr(struct gk20a *g, void *sgl,
519 struct nvgpu_gmmu_attrs *attrs)
520{
521 if (sg_dma_address((struct scatterlist *)sgl) == 0)
522 return g->ops.mm.gpu_phys_addr(g, attrs,
523 sg_phys((struct scatterlist *)sgl));
524
525 if (sg_dma_address((struct scatterlist *)sgl) == DMA_ERROR_CODE)
526 return 0;
527
528 return nvgpu_mem_iommu_translate(g,
529 sg_dma_address((struct scatterlist *)sgl));
530}
531
532static bool nvgpu_mem_linux_sgt_iommuable(struct gk20a *g,
533 struct nvgpu_sgt *sgt)
534{
535 return true;
536}
537
538static void nvgpu_mem_linux_sgl_free(struct gk20a *g, struct nvgpu_sgt *sgt)
539{
540 /*
541 * Free this SGT. All we do is free the passed SGT. The actual Linux
542 * SGT/SGL needs to be freed separately.
543 */
544 nvgpu_kfree(g, sgt);
545}
546
547static const struct nvgpu_sgt_ops nvgpu_linux_sgt_ops = {
548 .sgl_next = nvgpu_mem_linux_sgl_next,
549 .sgl_phys = nvgpu_mem_linux_sgl_phys,
550 .sgl_dma = nvgpu_mem_linux_sgl_dma,
551 .sgl_length = nvgpu_mem_linux_sgl_length,
552 .sgl_gpu_addr = nvgpu_mem_linux_sgl_gpu_addr,
553 .sgt_iommuable = nvgpu_mem_linux_sgt_iommuable,
554 .sgt_free = nvgpu_mem_linux_sgl_free,
555};
556
557static struct nvgpu_sgt *__nvgpu_mem_get_sgl_from_vidmem(
558 struct gk20a *g,
559 struct scatterlist *linux_sgl)
560{
561 struct nvgpu_page_alloc *vidmem_alloc;
562
563 vidmem_alloc = nvgpu_vidmem_get_page_alloc(linux_sgl);
564 if (!vidmem_alloc)
565 return NULL;
566
567 return &vidmem_alloc->sgt;
568}
569
570struct nvgpu_sgt *nvgpu_linux_sgt_create(struct gk20a *g, struct sg_table *sgt)
571{
572 struct nvgpu_sgt *nvgpu_sgt;
573 struct scatterlist *linux_sgl = sgt->sgl;
574
575 if (nvgpu_addr_is_vidmem_page_alloc(sg_dma_address(linux_sgl)))
576 return __nvgpu_mem_get_sgl_from_vidmem(g, linux_sgl);
577
578 nvgpu_sgt = nvgpu_kzalloc(g, sizeof(*nvgpu_sgt));
579 if (!nvgpu_sgt)
580 return NULL;
581
582 nvgpu_log(g, gpu_dbg_sgl, "Making Linux SGL!");
583
584 nvgpu_sgt->sgl = sgt->sgl;
585 nvgpu_sgt->ops = &nvgpu_linux_sgt_ops;
586
587 return nvgpu_sgt;
588}
589
590struct nvgpu_sgt *nvgpu_sgt_create_from_mem(struct gk20a *g,
591 struct nvgpu_mem *mem)
592{
593 return nvgpu_linux_sgt_create(g, mem->priv.sgt);
594}