diff options
Diffstat (limited to 'drivers/gpu/nvgpu/common/linux/nvgpu_mem.c')
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/nvgpu_mem.c | 594 |
1 files changed, 594 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c new file mode 100644 index 00000000..2587d56a --- /dev/null +++ b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c | |||
@@ -0,0 +1,594 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <nvgpu/dma.h> | ||
18 | #include <nvgpu/gmmu.h> | ||
19 | #include <nvgpu/nvgpu_mem.h> | ||
20 | #include <nvgpu/page_allocator.h> | ||
21 | #include <nvgpu/log.h> | ||
22 | #include <nvgpu/bug.h> | ||
23 | #include <nvgpu/enabled.h> | ||
24 | #include <nvgpu/kmem.h> | ||
25 | #include <nvgpu/vidmem.h> | ||
26 | |||
27 | #include <nvgpu/linux/dma.h> | ||
28 | #include <nvgpu/linux/vidmem.h> | ||
29 | |||
30 | #include "os_linux.h" | ||
31 | |||
32 | #include "gk20a/gk20a.h" | ||
33 | #include "gk20a/mm_gk20a.h" | ||
34 | |||
35 | u32 __nvgpu_aperture_mask(struct gk20a *g, enum nvgpu_aperture aperture, | ||
36 | u32 sysmem_mask, u32 vidmem_mask) | ||
37 | { | ||
38 | switch (aperture) { | ||
39 | case APERTURE_SYSMEM: | ||
40 | /* some igpus consider system memory vidmem */ | ||
41 | return nvgpu_is_enabled(g, NVGPU_MM_HONORS_APERTURE) | ||
42 | ? sysmem_mask : vidmem_mask; | ||
43 | case APERTURE_VIDMEM: | ||
44 | /* for dgpus only */ | ||
45 | return vidmem_mask; | ||
46 | case APERTURE_INVALID: | ||
47 | WARN_ON("Bad aperture"); | ||
48 | } | ||
49 | return 0; | ||
50 | } | ||
51 | |||
52 | u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem, | ||
53 | u32 sysmem_mask, u32 vidmem_mask) | ||
54 | { | ||
55 | return __nvgpu_aperture_mask(g, mem->aperture, | ||
56 | sysmem_mask, vidmem_mask); | ||
57 | } | ||
58 | |||
59 | int nvgpu_mem_begin(struct gk20a *g, struct nvgpu_mem *mem) | ||
60 | { | ||
61 | void *cpu_va; | ||
62 | |||
63 | if (mem->aperture != APERTURE_SYSMEM || g->mm.force_pramin) | ||
64 | return 0; | ||
65 | |||
66 | /* | ||
67 | * A CPU mapping is implicitly made for all SYSMEM DMA allocations that | ||
68 | * don't have NVGPU_DMA_NO_KERNEL_MAPPING. Thus we don't need to make | ||
69 | * another CPU mapping. | ||
70 | */ | ||
71 | if (!(mem->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING)) | ||
72 | return 0; | ||
73 | |||
74 | if (WARN_ON(mem->cpu_va)) { | ||
75 | nvgpu_warn(g, "nested"); | ||
76 | return -EBUSY; | ||
77 | } | ||
78 | |||
79 | cpu_va = vmap(mem->priv.pages, | ||
80 | PAGE_ALIGN(mem->size) >> PAGE_SHIFT, | ||
81 | 0, pgprot_writecombine(PAGE_KERNEL)); | ||
82 | |||
83 | if (WARN_ON(!cpu_va)) | ||
84 | return -ENOMEM; | ||
85 | |||
86 | mem->cpu_va = cpu_va; | ||
87 | return 0; | ||
88 | } | ||
89 | |||
90 | void nvgpu_mem_end(struct gk20a *g, struct nvgpu_mem *mem) | ||
91 | { | ||
92 | if (mem->aperture != APERTURE_SYSMEM || g->mm.force_pramin) | ||
93 | return; | ||
94 | |||
95 | /* | ||
96 | * Similar to nvgpu_mem_begin() we don't need to unmap the CPU mapping | ||
97 | * already made by the DMA API. | ||
98 | */ | ||
99 | if (!(mem->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING)) | ||
100 | return; | ||
101 | |||
102 | vunmap(mem->cpu_va); | ||
103 | mem->cpu_va = NULL; | ||
104 | } | ||
105 | |||
106 | static void pramin_access_batch_rd_n(struct gk20a *g, u32 start, u32 words, u32 **arg) | ||
107 | { | ||
108 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
109 | u32 r = start, *dest_u32 = *arg; | ||
110 | |||
111 | if (!l->regs) { | ||
112 | __gk20a_warn_on_no_regs(); | ||
113 | return; | ||
114 | } | ||
115 | |||
116 | while (words--) { | ||
117 | *dest_u32++ = gk20a_readl(g, r); | ||
118 | r += sizeof(u32); | ||
119 | } | ||
120 | |||
121 | *arg = dest_u32; | ||
122 | } | ||
123 | |||
124 | u32 nvgpu_mem_rd32(struct gk20a *g, struct nvgpu_mem *mem, u32 w) | ||
125 | { | ||
126 | u32 data = 0; | ||
127 | |||
128 | if (mem->aperture == APERTURE_SYSMEM && !g->mm.force_pramin) { | ||
129 | u32 *ptr = mem->cpu_va; | ||
130 | |||
131 | WARN_ON(!ptr); | ||
132 | data = ptr[w]; | ||
133 | #ifdef CONFIG_TEGRA_SIMULATION_PLATFORM | ||
134 | gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr + w, data); | ||
135 | #endif | ||
136 | } else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) { | ||
137 | u32 value; | ||
138 | u32 *p = &value; | ||
139 | |||
140 | nvgpu_pramin_access_batched(g, mem, w * sizeof(u32), | ||
141 | sizeof(u32), pramin_access_batch_rd_n, &p); | ||
142 | |||
143 | data = value; | ||
144 | |||
145 | } else { | ||
146 | WARN_ON("Accessing unallocated nvgpu_mem"); | ||
147 | } | ||
148 | |||
149 | return data; | ||
150 | } | ||
151 | |||
152 | u32 nvgpu_mem_rd(struct gk20a *g, struct nvgpu_mem *mem, u32 offset) | ||
153 | { | ||
154 | WARN_ON(offset & 3); | ||
155 | return nvgpu_mem_rd32(g, mem, offset / sizeof(u32)); | ||
156 | } | ||
157 | |||
158 | void nvgpu_mem_rd_n(struct gk20a *g, struct nvgpu_mem *mem, | ||
159 | u32 offset, void *dest, u32 size) | ||
160 | { | ||
161 | WARN_ON(offset & 3); | ||
162 | WARN_ON(size & 3); | ||
163 | |||
164 | if (mem->aperture == APERTURE_SYSMEM && !g->mm.force_pramin) { | ||
165 | u8 *src = (u8 *)mem->cpu_va + offset; | ||
166 | |||
167 | WARN_ON(!mem->cpu_va); | ||
168 | memcpy(dest, src, size); | ||
169 | #ifdef CONFIG_TEGRA_SIMULATION_PLATFORM | ||
170 | if (size) | ||
171 | gk20a_dbg(gpu_dbg_mem, " %p = 0x%x ... [%d bytes]", | ||
172 | src, *dest, size); | ||
173 | #endif | ||
174 | } else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) { | ||
175 | u32 *dest_u32 = dest; | ||
176 | |||
177 | nvgpu_pramin_access_batched(g, mem, offset, size, | ||
178 | pramin_access_batch_rd_n, &dest_u32); | ||
179 | } else { | ||
180 | WARN_ON("Accessing unallocated nvgpu_mem"); | ||
181 | } | ||
182 | } | ||
183 | |||
184 | static void pramin_access_batch_wr_n(struct gk20a *g, u32 start, u32 words, u32 **arg) | ||
185 | { | ||
186 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
187 | u32 r = start, *src_u32 = *arg; | ||
188 | |||
189 | if (!l->regs) { | ||
190 | __gk20a_warn_on_no_regs(); | ||
191 | return; | ||
192 | } | ||
193 | |||
194 | while (words--) { | ||
195 | writel_relaxed(*src_u32++, l->regs + r); | ||
196 | r += sizeof(u32); | ||
197 | } | ||
198 | |||
199 | *arg = src_u32; | ||
200 | } | ||
201 | |||
202 | void nvgpu_mem_wr32(struct gk20a *g, struct nvgpu_mem *mem, u32 w, u32 data) | ||
203 | { | ||
204 | if (mem->aperture == APERTURE_SYSMEM && !g->mm.force_pramin) { | ||
205 | u32 *ptr = mem->cpu_va; | ||
206 | |||
207 | WARN_ON(!ptr); | ||
208 | #ifdef CONFIG_TEGRA_SIMULATION_PLATFORM | ||
209 | gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr + w, data); | ||
210 | #endif | ||
211 | ptr[w] = data; | ||
212 | } else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) { | ||
213 | u32 value = data; | ||
214 | u32 *p = &value; | ||
215 | |||
216 | nvgpu_pramin_access_batched(g, mem, w * sizeof(u32), | ||
217 | sizeof(u32), pramin_access_batch_wr_n, &p); | ||
218 | if (!mem->skip_wmb) | ||
219 | wmb(); | ||
220 | } else { | ||
221 | WARN_ON("Accessing unallocated nvgpu_mem"); | ||
222 | } | ||
223 | } | ||
224 | |||
225 | void nvgpu_mem_wr(struct gk20a *g, struct nvgpu_mem *mem, u32 offset, u32 data) | ||
226 | { | ||
227 | WARN_ON(offset & 3); | ||
228 | nvgpu_mem_wr32(g, mem, offset / sizeof(u32), data); | ||
229 | } | ||
230 | |||
231 | void nvgpu_mem_wr_n(struct gk20a *g, struct nvgpu_mem *mem, u32 offset, | ||
232 | void *src, u32 size) | ||
233 | { | ||
234 | WARN_ON(offset & 3); | ||
235 | WARN_ON(size & 3); | ||
236 | |||
237 | if (mem->aperture == APERTURE_SYSMEM && !g->mm.force_pramin) { | ||
238 | u8 *dest = (u8 *)mem->cpu_va + offset; | ||
239 | |||
240 | WARN_ON(!mem->cpu_va); | ||
241 | #ifdef CONFIG_TEGRA_SIMULATION_PLATFORM | ||
242 | if (size) | ||
243 | gk20a_dbg(gpu_dbg_mem, " %p = 0x%x ... [%d bytes]", | ||
244 | dest, *src, size); | ||
245 | #endif | ||
246 | memcpy(dest, src, size); | ||
247 | } else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) { | ||
248 | u32 *src_u32 = src; | ||
249 | |||
250 | nvgpu_pramin_access_batched(g, mem, offset, size, | ||
251 | pramin_access_batch_wr_n, &src_u32); | ||
252 | if (!mem->skip_wmb) | ||
253 | wmb(); | ||
254 | } else { | ||
255 | WARN_ON("Accessing unallocated nvgpu_mem"); | ||
256 | } | ||
257 | } | ||
258 | |||
259 | static void pramin_access_batch_set(struct gk20a *g, u32 start, u32 words, u32 **arg) | ||
260 | { | ||
261 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
262 | u32 r = start, repeat = **arg; | ||
263 | |||
264 | if (!l->regs) { | ||
265 | __gk20a_warn_on_no_regs(); | ||
266 | return; | ||
267 | } | ||
268 | |||
269 | while (words--) { | ||
270 | writel_relaxed(repeat, l->regs + r); | ||
271 | r += sizeof(u32); | ||
272 | } | ||
273 | } | ||
274 | |||
275 | void nvgpu_memset(struct gk20a *g, struct nvgpu_mem *mem, u32 offset, | ||
276 | u32 c, u32 size) | ||
277 | { | ||
278 | WARN_ON(offset & 3); | ||
279 | WARN_ON(size & 3); | ||
280 | WARN_ON(c & ~0xff); | ||
281 | |||
282 | c &= 0xff; | ||
283 | |||
284 | if (mem->aperture == APERTURE_SYSMEM && !g->mm.force_pramin) { | ||
285 | u8 *dest = (u8 *)mem->cpu_va + offset; | ||
286 | |||
287 | WARN_ON(!mem->cpu_va); | ||
288 | #ifdef CONFIG_TEGRA_SIMULATION_PLATFORM | ||
289 | if (size) | ||
290 | gk20a_dbg(gpu_dbg_mem, " %p = 0x%x [times %d]", | ||
291 | dest, c, size); | ||
292 | #endif | ||
293 | memset(dest, c, size); | ||
294 | } else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) { | ||
295 | u32 repeat_value = c | (c << 8) | (c << 16) | (c << 24); | ||
296 | u32 *p = &repeat_value; | ||
297 | |||
298 | nvgpu_pramin_access_batched(g, mem, offset, size, | ||
299 | pramin_access_batch_set, &p); | ||
300 | if (!mem->skip_wmb) | ||
301 | wmb(); | ||
302 | } else { | ||
303 | WARN_ON("Accessing unallocated nvgpu_mem"); | ||
304 | } | ||
305 | } | ||
306 | |||
307 | /* | ||
308 | * Obtain a SYSMEM address from a Linux SGL. This should eventually go away | ||
309 | * and/or become private to this file once all bad usages of Linux SGLs are | ||
310 | * cleaned up in the driver. | ||
311 | */ | ||
312 | u64 nvgpu_mem_get_addr_sgl(struct gk20a *g, struct scatterlist *sgl) | ||
313 | { | ||
314 | struct nvgpu_os_linux *l = container_of(g, struct nvgpu_os_linux, g); | ||
315 | |||
316 | if (!device_is_iommuable(l->dev)) | ||
317 | return g->ops.mm.gpu_phys_addr(g, NULL, sg_phys(sgl)); | ||
318 | |||
319 | if (sg_dma_address(sgl) == 0) | ||
320 | return g->ops.mm.gpu_phys_addr(g, NULL, sg_phys(sgl)); | ||
321 | |||
322 | if (sg_dma_address(sgl) == DMA_ERROR_CODE) | ||
323 | return 0; | ||
324 | |||
325 | return nvgpu_mem_iommu_translate(g, sg_dma_address(sgl)); | ||
326 | } | ||
327 | |||
328 | /* | ||
329 | * Obtain the address the GPU should use from the %mem assuming this is a SYSMEM | ||
330 | * allocation. | ||
331 | */ | ||
332 | static u64 nvgpu_mem_get_addr_sysmem(struct gk20a *g, struct nvgpu_mem *mem) | ||
333 | { | ||
334 | return nvgpu_mem_get_addr_sgl(g, mem->priv.sgt->sgl); | ||
335 | } | ||
336 | |||
337 | /* | ||
338 | * Return the base address of %mem. Handles whether this is a VIDMEM or SYSMEM | ||
339 | * allocation. | ||
340 | * | ||
341 | * Note: this API does not make sense to use for _VIDMEM_ buffers with greater | ||
342 | * than one scatterlist chunk. If there's more than one scatterlist chunk then | ||
343 | * the buffer will not be contiguous. As such the base address probably isn't | ||
344 | * very useful. This is true for SYSMEM as well, if there's no IOMMU. | ||
345 | * | ||
346 | * However! It _is_ OK to use this on discontiguous sysmem buffers _if_ there's | ||
347 | * an IOMMU present and enabled for the GPU. | ||
348 | * | ||
349 | * %attrs can be NULL. If it is not NULL then it may be inspected to determine | ||
350 | * if the address needs to be modified before writing into a PTE. | ||
351 | */ | ||
352 | u64 nvgpu_mem_get_addr(struct gk20a *g, struct nvgpu_mem *mem) | ||
353 | { | ||
354 | struct nvgpu_page_alloc *alloc; | ||
355 | |||
356 | if (mem->aperture == APERTURE_SYSMEM) | ||
357 | return nvgpu_mem_get_addr_sysmem(g, mem); | ||
358 | |||
359 | /* | ||
360 | * Otherwise get the vidmem address. | ||
361 | */ | ||
362 | alloc = mem->vidmem_alloc; | ||
363 | |||
364 | /* This API should not be used with > 1 chunks */ | ||
365 | WARN_ON(alloc->nr_chunks != 1); | ||
366 | |||
367 | return alloc->base; | ||
368 | } | ||
369 | |||
370 | /* | ||
371 | * This should only be used on contiguous buffers regardless of whether | ||
372 | * there's an IOMMU present/enabled. This applies to both SYSMEM and | ||
373 | * VIDMEM. | ||
374 | */ | ||
375 | u64 nvgpu_mem_get_phys_addr(struct gk20a *g, struct nvgpu_mem *mem) | ||
376 | { | ||
377 | /* | ||
378 | * For a VIDMEM buf, this is identical to simply get_addr() so just fall | ||
379 | * back to that. | ||
380 | */ | ||
381 | if (mem->aperture == APERTURE_VIDMEM) | ||
382 | return nvgpu_mem_get_addr(g, mem); | ||
383 | |||
384 | return sg_phys(mem->priv.sgt->sgl); | ||
385 | } | ||
386 | |||
387 | /* | ||
388 | * Be careful how you use this! You are responsible for correctly freeing this | ||
389 | * memory. | ||
390 | */ | ||
391 | int nvgpu_mem_create_from_mem(struct gk20a *g, | ||
392 | struct nvgpu_mem *dest, struct nvgpu_mem *src, | ||
393 | int start_page, int nr_pages) | ||
394 | { | ||
395 | int ret; | ||
396 | u64 start = start_page * PAGE_SIZE; | ||
397 | u64 size = nr_pages * PAGE_SIZE; | ||
398 | dma_addr_t new_iova; | ||
399 | |||
400 | if (src->aperture != APERTURE_SYSMEM) | ||
401 | return -EINVAL; | ||
402 | |||
403 | /* Some silly things a caller might do... */ | ||
404 | if (size > src->size) | ||
405 | return -EINVAL; | ||
406 | if ((start + size) > src->size) | ||
407 | return -EINVAL; | ||
408 | |||
409 | dest->mem_flags = src->mem_flags | NVGPU_MEM_FLAG_SHADOW_COPY; | ||
410 | dest->aperture = src->aperture; | ||
411 | dest->skip_wmb = src->skip_wmb; | ||
412 | dest->size = size; | ||
413 | |||
414 | /* | ||
415 | * Re-use the CPU mapping only if the mapping was made by the DMA API. | ||
416 | */ | ||
417 | if (!(src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING)) | ||
418 | dest->cpu_va = src->cpu_va + (PAGE_SIZE * start_page); | ||
419 | |||
420 | dest->priv.pages = src->priv.pages + start_page; | ||
421 | dest->priv.flags = src->priv.flags; | ||
422 | |||
423 | new_iova = sg_dma_address(src->priv.sgt->sgl) ? | ||
424 | sg_dma_address(src->priv.sgt->sgl) + start : 0; | ||
425 | |||
426 | /* | ||
427 | * Make a new SG table that is based only on the subset of pages that | ||
428 | * is passed to us. This table gets freed by the dma free routines. | ||
429 | */ | ||
430 | if (src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING) | ||
431 | ret = nvgpu_get_sgtable_from_pages(g, &dest->priv.sgt, | ||
432 | src->priv.pages + start_page, | ||
433 | new_iova, size); | ||
434 | else | ||
435 | ret = nvgpu_get_sgtable(g, &dest->priv.sgt, dest->cpu_va, | ||
436 | new_iova, size); | ||
437 | |||
438 | return ret; | ||
439 | } | ||
440 | |||
441 | int __nvgpu_mem_create_from_pages(struct gk20a *g, struct nvgpu_mem *dest, | ||
442 | struct page **pages, int nr_pages) | ||
443 | { | ||
444 | struct sg_table *sgt; | ||
445 | struct page **our_pages = | ||
446 | nvgpu_kmalloc(g, sizeof(struct page *) * nr_pages); | ||
447 | |||
448 | if (!our_pages) | ||
449 | return -ENOMEM; | ||
450 | |||
451 | memcpy(our_pages, pages, sizeof(struct page *) * nr_pages); | ||
452 | |||
453 | if (nvgpu_get_sgtable_from_pages(g, &sgt, pages, 0, | ||
454 | nr_pages * PAGE_SIZE)) { | ||
455 | nvgpu_kfree(g, our_pages); | ||
456 | return -ENOMEM; | ||
457 | } | ||
458 | |||
459 | /* | ||
460 | * If we are making an SGT from physical pages we can be reasonably | ||
461 | * certain that this should bypass the SMMU - thus we set the DMA (aka | ||
462 | * IOVA) address to 0. This tells the GMMU mapping code to not make a | ||
463 | * mapping directed to the SMMU. | ||
464 | */ | ||
465 | sg_dma_address(sgt->sgl) = 0; | ||
466 | |||
467 | dest->mem_flags = __NVGPU_MEM_FLAG_NO_DMA; | ||
468 | dest->aperture = APERTURE_SYSMEM; | ||
469 | dest->skip_wmb = 0; | ||
470 | dest->size = PAGE_SIZE * nr_pages; | ||
471 | |||
472 | dest->priv.flags = 0; | ||
473 | dest->priv.pages = our_pages; | ||
474 | dest->priv.sgt = sgt; | ||
475 | |||
476 | return 0; | ||
477 | } | ||
478 | |||
479 | int __nvgpu_mem_create_from_phys(struct gk20a *g, struct nvgpu_mem *dest, | ||
480 | u64 src_phys, int nr_pages) | ||
481 | { | ||
482 | struct page **pages = | ||
483 | nvgpu_kmalloc(g, sizeof(struct page *) * nr_pages); | ||
484 | int i, ret = 0; | ||
485 | |||
486 | if (!pages) | ||
487 | return -ENOMEM; | ||
488 | |||
489 | for (i = 0; i < nr_pages; i++) | ||
490 | pages[i] = phys_to_page(src_phys + PAGE_SIZE * i); | ||
491 | |||
492 | ret = __nvgpu_mem_create_from_pages(g, dest, pages, nr_pages); | ||
493 | nvgpu_kfree(g, pages); | ||
494 | |||
495 | return ret; | ||
496 | } | ||
497 | |||
498 | static void *nvgpu_mem_linux_sgl_next(void *sgl) | ||
499 | { | ||
500 | return sg_next((struct scatterlist *)sgl); | ||
501 | } | ||
502 | |||
503 | static u64 nvgpu_mem_linux_sgl_phys(void *sgl) | ||
504 | { | ||
505 | return (u64)sg_phys((struct scatterlist *)sgl); | ||
506 | } | ||
507 | |||
508 | static u64 nvgpu_mem_linux_sgl_dma(void *sgl) | ||
509 | { | ||
510 | return (u64)sg_dma_address((struct scatterlist *)sgl); | ||
511 | } | ||
512 | |||
513 | static u64 nvgpu_mem_linux_sgl_length(void *sgl) | ||
514 | { | ||
515 | return (u64)((struct scatterlist *)sgl)->length; | ||
516 | } | ||
517 | |||
518 | static u64 nvgpu_mem_linux_sgl_gpu_addr(struct gk20a *g, void *sgl, | ||
519 | struct nvgpu_gmmu_attrs *attrs) | ||
520 | { | ||
521 | if (sg_dma_address((struct scatterlist *)sgl) == 0) | ||
522 | return g->ops.mm.gpu_phys_addr(g, attrs, | ||
523 | sg_phys((struct scatterlist *)sgl)); | ||
524 | |||
525 | if (sg_dma_address((struct scatterlist *)sgl) == DMA_ERROR_CODE) | ||
526 | return 0; | ||
527 | |||
528 | return nvgpu_mem_iommu_translate(g, | ||
529 | sg_dma_address((struct scatterlist *)sgl)); | ||
530 | } | ||
531 | |||
532 | static bool nvgpu_mem_linux_sgt_iommuable(struct gk20a *g, | ||
533 | struct nvgpu_sgt *sgt) | ||
534 | { | ||
535 | return true; | ||
536 | } | ||
537 | |||
538 | static void nvgpu_mem_linux_sgl_free(struct gk20a *g, struct nvgpu_sgt *sgt) | ||
539 | { | ||
540 | /* | ||
541 | * Free this SGT. All we do is free the passed SGT. The actual Linux | ||
542 | * SGT/SGL needs to be freed separately. | ||
543 | */ | ||
544 | nvgpu_kfree(g, sgt); | ||
545 | } | ||
546 | |||
547 | static const struct nvgpu_sgt_ops nvgpu_linux_sgt_ops = { | ||
548 | .sgl_next = nvgpu_mem_linux_sgl_next, | ||
549 | .sgl_phys = nvgpu_mem_linux_sgl_phys, | ||
550 | .sgl_dma = nvgpu_mem_linux_sgl_dma, | ||
551 | .sgl_length = nvgpu_mem_linux_sgl_length, | ||
552 | .sgl_gpu_addr = nvgpu_mem_linux_sgl_gpu_addr, | ||
553 | .sgt_iommuable = nvgpu_mem_linux_sgt_iommuable, | ||
554 | .sgt_free = nvgpu_mem_linux_sgl_free, | ||
555 | }; | ||
556 | |||
557 | static struct nvgpu_sgt *__nvgpu_mem_get_sgl_from_vidmem( | ||
558 | struct gk20a *g, | ||
559 | struct scatterlist *linux_sgl) | ||
560 | { | ||
561 | struct nvgpu_page_alloc *vidmem_alloc; | ||
562 | |||
563 | vidmem_alloc = nvgpu_vidmem_get_page_alloc(linux_sgl); | ||
564 | if (!vidmem_alloc) | ||
565 | return NULL; | ||
566 | |||
567 | return &vidmem_alloc->sgt; | ||
568 | } | ||
569 | |||
570 | struct nvgpu_sgt *nvgpu_linux_sgt_create(struct gk20a *g, struct sg_table *sgt) | ||
571 | { | ||
572 | struct nvgpu_sgt *nvgpu_sgt; | ||
573 | struct scatterlist *linux_sgl = sgt->sgl; | ||
574 | |||
575 | if (nvgpu_addr_is_vidmem_page_alloc(sg_dma_address(linux_sgl))) | ||
576 | return __nvgpu_mem_get_sgl_from_vidmem(g, linux_sgl); | ||
577 | |||
578 | nvgpu_sgt = nvgpu_kzalloc(g, sizeof(*nvgpu_sgt)); | ||
579 | if (!nvgpu_sgt) | ||
580 | return NULL; | ||
581 | |||
582 | nvgpu_log(g, gpu_dbg_sgl, "Making Linux SGL!"); | ||
583 | |||
584 | nvgpu_sgt->sgl = sgt->sgl; | ||
585 | nvgpu_sgt->ops = &nvgpu_linux_sgt_ops; | ||
586 | |||
587 | return nvgpu_sgt; | ||
588 | } | ||
589 | |||
590 | struct nvgpu_sgt *nvgpu_sgt_create_from_mem(struct gk20a *g, | ||
591 | struct nvgpu_mem *mem) | ||
592 | { | ||
593 | return nvgpu_linux_sgt_create(g, mem->priv.sgt); | ||
594 | } | ||