diff options
Diffstat (limited to 'drivers/gpu/nvgpu/os/linux/nvgpu_mem.c')
-rw-r--r-- | drivers/gpu/nvgpu/os/linux/nvgpu_mem.c | 613 |
1 files changed, 613 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/os/linux/nvgpu_mem.c b/drivers/gpu/nvgpu/os/linux/nvgpu_mem.c new file mode 100644 index 00000000..93925803 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/nvgpu_mem.c | |||
@@ -0,0 +1,613 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <nvgpu/dma.h> | ||
18 | #include <nvgpu/gmmu.h> | ||
19 | #include <nvgpu/nvgpu_mem.h> | ||
20 | #include <nvgpu/page_allocator.h> | ||
21 | #include <nvgpu/log.h> | ||
22 | #include <nvgpu/bug.h> | ||
23 | #include <nvgpu/enabled.h> | ||
24 | #include <nvgpu/kmem.h> | ||
25 | #include <nvgpu/vidmem.h> | ||
26 | |||
27 | #include <nvgpu/linux/dma.h> | ||
28 | #include <nvgpu/linux/vidmem.h> | ||
29 | |||
30 | #include <linux/vmalloc.h> | ||
31 | |||
32 | #include "os_linux.h" | ||
33 | |||
34 | #include "gk20a/gk20a.h" | ||
35 | #include "gk20a/mm_gk20a.h" | ||
36 | #include "platform_gk20a.h" | ||
37 | |||
38 | static u64 __nvgpu_sgl_phys(struct gk20a *g, struct nvgpu_sgl *sgl) | ||
39 | { | ||
40 | struct device *dev = dev_from_gk20a(g); | ||
41 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
42 | u64 ipa = sg_phys((struct scatterlist *)sgl); | ||
43 | |||
44 | if (platform->phys_addr) | ||
45 | return platform->phys_addr(g, ipa); | ||
46 | |||
47 | return ipa; | ||
48 | } | ||
49 | |||
50 | int nvgpu_mem_begin(struct gk20a *g, struct nvgpu_mem *mem) | ||
51 | { | ||
52 | void *cpu_va; | ||
53 | pgprot_t prot = nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) ? | ||
54 | PAGE_KERNEL : | ||
55 | pgprot_writecombine(PAGE_KERNEL); | ||
56 | |||
57 | if (mem->aperture != APERTURE_SYSMEM) | ||
58 | return 0; | ||
59 | |||
60 | /* | ||
61 | * WAR for bug 2040115: we already will always have a coherent vmap() | ||
62 | * for all sysmem buffers. The prot settings are left alone since | ||
63 | * eventually this should be deleted. | ||
64 | */ | ||
65 | if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) | ||
66 | return 0; | ||
67 | |||
68 | /* | ||
69 | * A CPU mapping is implicitly made for all SYSMEM DMA allocations that | ||
70 | * don't have NVGPU_DMA_NO_KERNEL_MAPPING. Thus we don't need to make | ||
71 | * another CPU mapping. | ||
72 | */ | ||
73 | if (!(mem->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING)) | ||
74 | return 0; | ||
75 | |||
76 | if (WARN_ON(mem->cpu_va)) { | ||
77 | nvgpu_warn(g, "nested"); | ||
78 | return -EBUSY; | ||
79 | } | ||
80 | |||
81 | cpu_va = vmap(mem->priv.pages, | ||
82 | PAGE_ALIGN(mem->size) >> PAGE_SHIFT, | ||
83 | 0, prot); | ||
84 | |||
85 | if (WARN_ON(!cpu_va)) | ||
86 | return -ENOMEM; | ||
87 | |||
88 | mem->cpu_va = cpu_va; | ||
89 | return 0; | ||
90 | } | ||
91 | |||
92 | void nvgpu_mem_end(struct gk20a *g, struct nvgpu_mem *mem) | ||
93 | { | ||
94 | if (mem->aperture != APERTURE_SYSMEM) | ||
95 | return; | ||
96 | |||
97 | /* | ||
98 | * WAR for bug 2040115: skip this since the map will be taken care of | ||
99 | * during the free in the DMA API. | ||
100 | */ | ||
101 | if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) | ||
102 | return; | ||
103 | |||
104 | /* | ||
105 | * Similar to nvgpu_mem_begin() we don't need to unmap the CPU mapping | ||
106 | * already made by the DMA API. | ||
107 | */ | ||
108 | if (!(mem->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING)) | ||
109 | return; | ||
110 | |||
111 | vunmap(mem->cpu_va); | ||
112 | mem->cpu_va = NULL; | ||
113 | } | ||
114 | |||
115 | static void pramin_access_batch_rd_n(struct gk20a *g, u32 start, u32 words, u32 **arg) | ||
116 | { | ||
117 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
118 | u32 r = start, *dest_u32 = *arg; | ||
119 | |||
120 | if (!l->regs) { | ||
121 | __gk20a_warn_on_no_regs(); | ||
122 | return; | ||
123 | } | ||
124 | |||
125 | while (words--) { | ||
126 | *dest_u32++ = gk20a_readl(g, r); | ||
127 | r += sizeof(u32); | ||
128 | } | ||
129 | |||
130 | *arg = dest_u32; | ||
131 | } | ||
132 | |||
133 | u32 nvgpu_mem_rd32(struct gk20a *g, struct nvgpu_mem *mem, u32 w) | ||
134 | { | ||
135 | u32 data = 0; | ||
136 | |||
137 | if (mem->aperture == APERTURE_SYSMEM) { | ||
138 | u32 *ptr = mem->cpu_va; | ||
139 | |||
140 | WARN_ON(!ptr); | ||
141 | data = ptr[w]; | ||
142 | #ifdef CONFIG_TEGRA_SIMULATION_PLATFORM | ||
143 | nvgpu_log(g, gpu_dbg_mem, " %p = 0x%x", ptr + w, data); | ||
144 | #endif | ||
145 | } else if (mem->aperture == APERTURE_VIDMEM) { | ||
146 | u32 value; | ||
147 | u32 *p = &value; | ||
148 | |||
149 | nvgpu_pramin_access_batched(g, mem, w * sizeof(u32), | ||
150 | sizeof(u32), pramin_access_batch_rd_n, &p); | ||
151 | |||
152 | data = value; | ||
153 | |||
154 | } else { | ||
155 | WARN_ON("Accessing unallocated nvgpu_mem"); | ||
156 | } | ||
157 | |||
158 | return data; | ||
159 | } | ||
160 | |||
161 | u32 nvgpu_mem_rd(struct gk20a *g, struct nvgpu_mem *mem, u32 offset) | ||
162 | { | ||
163 | WARN_ON(offset & 3); | ||
164 | return nvgpu_mem_rd32(g, mem, offset / sizeof(u32)); | ||
165 | } | ||
166 | |||
167 | void nvgpu_mem_rd_n(struct gk20a *g, struct nvgpu_mem *mem, | ||
168 | u32 offset, void *dest, u32 size) | ||
169 | { | ||
170 | WARN_ON(offset & 3); | ||
171 | WARN_ON(size & 3); | ||
172 | |||
173 | if (mem->aperture == APERTURE_SYSMEM) { | ||
174 | u8 *src = (u8 *)mem->cpu_va + offset; | ||
175 | |||
176 | WARN_ON(!mem->cpu_va); | ||
177 | memcpy(dest, src, size); | ||
178 | #ifdef CONFIG_TEGRA_SIMULATION_PLATFORM | ||
179 | if (size) | ||
180 | nvgpu_log(g, gpu_dbg_mem, " %p = 0x%x ... [%d bytes]", | ||
181 | src, *dest, size); | ||
182 | #endif | ||
183 | } else if (mem->aperture == APERTURE_VIDMEM) { | ||
184 | u32 *dest_u32 = dest; | ||
185 | |||
186 | nvgpu_pramin_access_batched(g, mem, offset, size, | ||
187 | pramin_access_batch_rd_n, &dest_u32); | ||
188 | } else { | ||
189 | WARN_ON("Accessing unallocated nvgpu_mem"); | ||
190 | } | ||
191 | } | ||
192 | |||
193 | static void pramin_access_batch_wr_n(struct gk20a *g, u32 start, u32 words, u32 **arg) | ||
194 | { | ||
195 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
196 | u32 r = start, *src_u32 = *arg; | ||
197 | |||
198 | if (!l->regs) { | ||
199 | __gk20a_warn_on_no_regs(); | ||
200 | return; | ||
201 | } | ||
202 | |||
203 | while (words--) { | ||
204 | writel_relaxed(*src_u32++, l->regs + r); | ||
205 | r += sizeof(u32); | ||
206 | } | ||
207 | |||
208 | *arg = src_u32; | ||
209 | } | ||
210 | |||
211 | void nvgpu_mem_wr32(struct gk20a *g, struct nvgpu_mem *mem, u32 w, u32 data) | ||
212 | { | ||
213 | if (mem->aperture == APERTURE_SYSMEM) { | ||
214 | u32 *ptr = mem->cpu_va; | ||
215 | |||
216 | WARN_ON(!ptr); | ||
217 | #ifdef CONFIG_TEGRA_SIMULATION_PLATFORM | ||
218 | nvgpu_log(g, gpu_dbg_mem, " %p = 0x%x", ptr + w, data); | ||
219 | #endif | ||
220 | ptr[w] = data; | ||
221 | } else if (mem->aperture == APERTURE_VIDMEM) { | ||
222 | u32 value = data; | ||
223 | u32 *p = &value; | ||
224 | |||
225 | nvgpu_pramin_access_batched(g, mem, w * sizeof(u32), | ||
226 | sizeof(u32), pramin_access_batch_wr_n, &p); | ||
227 | if (!mem->skip_wmb) | ||
228 | wmb(); | ||
229 | } else { | ||
230 | WARN_ON("Accessing unallocated nvgpu_mem"); | ||
231 | } | ||
232 | } | ||
233 | |||
234 | void nvgpu_mem_wr(struct gk20a *g, struct nvgpu_mem *mem, u32 offset, u32 data) | ||
235 | { | ||
236 | WARN_ON(offset & 3); | ||
237 | nvgpu_mem_wr32(g, mem, offset / sizeof(u32), data); | ||
238 | } | ||
239 | |||
240 | void nvgpu_mem_wr_n(struct gk20a *g, struct nvgpu_mem *mem, u32 offset, | ||
241 | void *src, u32 size) | ||
242 | { | ||
243 | WARN_ON(offset & 3); | ||
244 | WARN_ON(size & 3); | ||
245 | |||
246 | if (mem->aperture == APERTURE_SYSMEM) { | ||
247 | u8 *dest = (u8 *)mem->cpu_va + offset; | ||
248 | |||
249 | WARN_ON(!mem->cpu_va); | ||
250 | #ifdef CONFIG_TEGRA_SIMULATION_PLATFORM | ||
251 | if (size) | ||
252 | nvgpu_log(g, gpu_dbg_mem, " %p = 0x%x ... [%d bytes]", | ||
253 | dest, *src, size); | ||
254 | #endif | ||
255 | memcpy(dest, src, size); | ||
256 | } else if (mem->aperture == APERTURE_VIDMEM) { | ||
257 | u32 *src_u32 = src; | ||
258 | |||
259 | nvgpu_pramin_access_batched(g, mem, offset, size, | ||
260 | pramin_access_batch_wr_n, &src_u32); | ||
261 | if (!mem->skip_wmb) | ||
262 | wmb(); | ||
263 | } else { | ||
264 | WARN_ON("Accessing unallocated nvgpu_mem"); | ||
265 | } | ||
266 | } | ||
267 | |||
268 | static void pramin_access_batch_set(struct gk20a *g, u32 start, u32 words, u32 **arg) | ||
269 | { | ||
270 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
271 | u32 r = start, repeat = **arg; | ||
272 | |||
273 | if (!l->regs) { | ||
274 | __gk20a_warn_on_no_regs(); | ||
275 | return; | ||
276 | } | ||
277 | |||
278 | while (words--) { | ||
279 | writel_relaxed(repeat, l->regs + r); | ||
280 | r += sizeof(u32); | ||
281 | } | ||
282 | } | ||
283 | |||
284 | void nvgpu_memset(struct gk20a *g, struct nvgpu_mem *mem, u32 offset, | ||
285 | u32 c, u32 size) | ||
286 | { | ||
287 | WARN_ON(offset & 3); | ||
288 | WARN_ON(size & 3); | ||
289 | WARN_ON(c & ~0xff); | ||
290 | |||
291 | c &= 0xff; | ||
292 | |||
293 | if (mem->aperture == APERTURE_SYSMEM) { | ||
294 | u8 *dest = (u8 *)mem->cpu_va + offset; | ||
295 | |||
296 | WARN_ON(!mem->cpu_va); | ||
297 | #ifdef CONFIG_TEGRA_SIMULATION_PLATFORM | ||
298 | if (size) | ||
299 | nvgpu_log(g, gpu_dbg_mem, " %p = 0x%x [times %d]", | ||
300 | dest, c, size); | ||
301 | #endif | ||
302 | memset(dest, c, size); | ||
303 | } else if (mem->aperture == APERTURE_VIDMEM) { | ||
304 | u32 repeat_value = c | (c << 8) | (c << 16) | (c << 24); | ||
305 | u32 *p = &repeat_value; | ||
306 | |||
307 | nvgpu_pramin_access_batched(g, mem, offset, size, | ||
308 | pramin_access_batch_set, &p); | ||
309 | if (!mem->skip_wmb) | ||
310 | wmb(); | ||
311 | } else { | ||
312 | WARN_ON("Accessing unallocated nvgpu_mem"); | ||
313 | } | ||
314 | } | ||
315 | |||
316 | /* | ||
317 | * Obtain a SYSMEM address from a Linux SGL. This should eventually go away | ||
318 | * and/or become private to this file once all bad usages of Linux SGLs are | ||
319 | * cleaned up in the driver. | ||
320 | */ | ||
321 | u64 nvgpu_mem_get_addr_sgl(struct gk20a *g, struct scatterlist *sgl) | ||
322 | { | ||
323 | if (nvgpu_is_enabled(g, NVGPU_MM_USE_PHYSICAL_SG) || | ||
324 | !nvgpu_iommuable(g)) | ||
325 | return g->ops.mm.gpu_phys_addr(g, NULL, | ||
326 | __nvgpu_sgl_phys(g, (struct nvgpu_sgl *)sgl)); | ||
327 | |||
328 | if (sg_dma_address(sgl) == 0) | ||
329 | return g->ops.mm.gpu_phys_addr(g, NULL, | ||
330 | __nvgpu_sgl_phys(g, (struct nvgpu_sgl *)sgl)); | ||
331 | |||
332 | if (sg_dma_address(sgl) == DMA_ERROR_CODE) | ||
333 | return 0; | ||
334 | |||
335 | return nvgpu_mem_iommu_translate(g, sg_dma_address(sgl)); | ||
336 | } | ||
337 | |||
338 | /* | ||
339 | * Obtain the address the GPU should use from the %mem assuming this is a SYSMEM | ||
340 | * allocation. | ||
341 | */ | ||
342 | static u64 nvgpu_mem_get_addr_sysmem(struct gk20a *g, struct nvgpu_mem *mem) | ||
343 | { | ||
344 | return nvgpu_mem_get_addr_sgl(g, mem->priv.sgt->sgl); | ||
345 | } | ||
346 | |||
347 | /* | ||
348 | * Return the base address of %mem. Handles whether this is a VIDMEM or SYSMEM | ||
349 | * allocation. | ||
350 | * | ||
351 | * Note: this API does not make sense to use for _VIDMEM_ buffers with greater | ||
352 | * than one scatterlist chunk. If there's more than one scatterlist chunk then | ||
353 | * the buffer will not be contiguous. As such the base address probably isn't | ||
354 | * very useful. This is true for SYSMEM as well, if there's no IOMMU. | ||
355 | * | ||
356 | * However! It _is_ OK to use this on discontiguous sysmem buffers _if_ there's | ||
357 | * an IOMMU present and enabled for the GPU. | ||
358 | * | ||
359 | * %attrs can be NULL. If it is not NULL then it may be inspected to determine | ||
360 | * if the address needs to be modified before writing into a PTE. | ||
361 | */ | ||
362 | u64 nvgpu_mem_get_addr(struct gk20a *g, struct nvgpu_mem *mem) | ||
363 | { | ||
364 | struct nvgpu_page_alloc *alloc; | ||
365 | |||
366 | if (mem->aperture == APERTURE_SYSMEM) | ||
367 | return nvgpu_mem_get_addr_sysmem(g, mem); | ||
368 | |||
369 | /* | ||
370 | * Otherwise get the vidmem address. | ||
371 | */ | ||
372 | alloc = mem->vidmem_alloc; | ||
373 | |||
374 | /* This API should not be used with > 1 chunks */ | ||
375 | WARN_ON(alloc->nr_chunks != 1); | ||
376 | |||
377 | return alloc->base; | ||
378 | } | ||
379 | |||
380 | /* | ||
381 | * This should only be used on contiguous buffers regardless of whether | ||
382 | * there's an IOMMU present/enabled. This applies to both SYSMEM and | ||
383 | * VIDMEM. | ||
384 | */ | ||
385 | u64 nvgpu_mem_get_phys_addr(struct gk20a *g, struct nvgpu_mem *mem) | ||
386 | { | ||
387 | /* | ||
388 | * For a VIDMEM buf, this is identical to simply get_addr() so just fall | ||
389 | * back to that. | ||
390 | */ | ||
391 | if (mem->aperture == APERTURE_VIDMEM) | ||
392 | return nvgpu_mem_get_addr(g, mem); | ||
393 | |||
394 | return __nvgpu_sgl_phys(g, (struct nvgpu_sgl *)mem->priv.sgt->sgl); | ||
395 | } | ||
396 | |||
397 | /* | ||
398 | * Be careful how you use this! You are responsible for correctly freeing this | ||
399 | * memory. | ||
400 | */ | ||
401 | int nvgpu_mem_create_from_mem(struct gk20a *g, | ||
402 | struct nvgpu_mem *dest, struct nvgpu_mem *src, | ||
403 | int start_page, int nr_pages) | ||
404 | { | ||
405 | int ret; | ||
406 | u64 start = start_page * PAGE_SIZE; | ||
407 | u64 size = nr_pages * PAGE_SIZE; | ||
408 | dma_addr_t new_iova; | ||
409 | |||
410 | if (src->aperture != APERTURE_SYSMEM) | ||
411 | return -EINVAL; | ||
412 | |||
413 | /* Some silly things a caller might do... */ | ||
414 | if (size > src->size) | ||
415 | return -EINVAL; | ||
416 | if ((start + size) > src->size) | ||
417 | return -EINVAL; | ||
418 | |||
419 | dest->mem_flags = src->mem_flags | NVGPU_MEM_FLAG_SHADOW_COPY; | ||
420 | dest->aperture = src->aperture; | ||
421 | dest->skip_wmb = src->skip_wmb; | ||
422 | dest->size = size; | ||
423 | |||
424 | /* | ||
425 | * Re-use the CPU mapping only if the mapping was made by the DMA API. | ||
426 | * | ||
427 | * Bug 2040115: the DMA API wrapper makes the mapping that we should | ||
428 | * re-use. | ||
429 | */ | ||
430 | if (!(src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING) || | ||
431 | nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) | ||
432 | dest->cpu_va = src->cpu_va + (PAGE_SIZE * start_page); | ||
433 | |||
434 | dest->priv.pages = src->priv.pages + start_page; | ||
435 | dest->priv.flags = src->priv.flags; | ||
436 | |||
437 | new_iova = sg_dma_address(src->priv.sgt->sgl) ? | ||
438 | sg_dma_address(src->priv.sgt->sgl) + start : 0; | ||
439 | |||
440 | /* | ||
441 | * Make a new SG table that is based only on the subset of pages that | ||
442 | * is passed to us. This table gets freed by the dma free routines. | ||
443 | */ | ||
444 | if (src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING) | ||
445 | ret = nvgpu_get_sgtable_from_pages(g, &dest->priv.sgt, | ||
446 | src->priv.pages + start_page, | ||
447 | new_iova, size); | ||
448 | else | ||
449 | ret = nvgpu_get_sgtable(g, &dest->priv.sgt, dest->cpu_va, | ||
450 | new_iova, size); | ||
451 | |||
452 | return ret; | ||
453 | } | ||
454 | |||
455 | int __nvgpu_mem_create_from_pages(struct gk20a *g, struct nvgpu_mem *dest, | ||
456 | struct page **pages, int nr_pages) | ||
457 | { | ||
458 | struct sg_table *sgt; | ||
459 | struct page **our_pages = | ||
460 | nvgpu_kmalloc(g, sizeof(struct page *) * nr_pages); | ||
461 | |||
462 | if (!our_pages) | ||
463 | return -ENOMEM; | ||
464 | |||
465 | memcpy(our_pages, pages, sizeof(struct page *) * nr_pages); | ||
466 | |||
467 | if (nvgpu_get_sgtable_from_pages(g, &sgt, pages, 0, | ||
468 | nr_pages * PAGE_SIZE)) { | ||
469 | nvgpu_kfree(g, our_pages); | ||
470 | return -ENOMEM; | ||
471 | } | ||
472 | |||
473 | /* | ||
474 | * If we are making an SGT from physical pages we can be reasonably | ||
475 | * certain that this should bypass the SMMU - thus we set the DMA (aka | ||
476 | * IOVA) address to 0. This tells the GMMU mapping code to not make a | ||
477 | * mapping directed to the SMMU. | ||
478 | */ | ||
479 | sg_dma_address(sgt->sgl) = 0; | ||
480 | |||
481 | dest->mem_flags = __NVGPU_MEM_FLAG_NO_DMA; | ||
482 | dest->aperture = APERTURE_SYSMEM; | ||
483 | dest->skip_wmb = 0; | ||
484 | dest->size = PAGE_SIZE * nr_pages; | ||
485 | |||
486 | dest->priv.flags = 0; | ||
487 | dest->priv.pages = our_pages; | ||
488 | dest->priv.sgt = sgt; | ||
489 | |||
490 | return 0; | ||
491 | } | ||
492 | |||
493 | #ifdef CONFIG_TEGRA_GK20A_NVHOST | ||
494 | int __nvgpu_mem_create_from_phys(struct gk20a *g, struct nvgpu_mem *dest, | ||
495 | u64 src_phys, int nr_pages) | ||
496 | { | ||
497 | struct page **pages = | ||
498 | nvgpu_kmalloc(g, sizeof(struct page *) * nr_pages); | ||
499 | int i, ret = 0; | ||
500 | |||
501 | if (!pages) | ||
502 | return -ENOMEM; | ||
503 | |||
504 | for (i = 0; i < nr_pages; i++) | ||
505 | pages[i] = phys_to_page(src_phys + PAGE_SIZE * i); | ||
506 | |||
507 | ret = __nvgpu_mem_create_from_pages(g, dest, pages, nr_pages); | ||
508 | nvgpu_kfree(g, pages); | ||
509 | |||
510 | return ret; | ||
511 | } | ||
512 | #endif | ||
513 | |||
514 | static struct nvgpu_sgl *nvgpu_mem_linux_sgl_next(struct nvgpu_sgl *sgl) | ||
515 | { | ||
516 | return (struct nvgpu_sgl *)sg_next((struct scatterlist *)sgl); | ||
517 | } | ||
518 | |||
519 | static u64 nvgpu_mem_linux_sgl_phys(struct gk20a *g, struct nvgpu_sgl *sgl) | ||
520 | { | ||
521 | return (u64)__nvgpu_sgl_phys(g, sgl); | ||
522 | } | ||
523 | |||
524 | static u64 nvgpu_mem_linux_sgl_dma(struct nvgpu_sgl *sgl) | ||
525 | { | ||
526 | return (u64)sg_dma_address((struct scatterlist *)sgl); | ||
527 | } | ||
528 | |||
529 | static u64 nvgpu_mem_linux_sgl_length(struct nvgpu_sgl *sgl) | ||
530 | { | ||
531 | return (u64)((struct scatterlist *)sgl)->length; | ||
532 | } | ||
533 | |||
534 | static u64 nvgpu_mem_linux_sgl_gpu_addr(struct gk20a *g, | ||
535 | struct nvgpu_sgl *sgl, | ||
536 | struct nvgpu_gmmu_attrs *attrs) | ||
537 | { | ||
538 | if (sg_dma_address((struct scatterlist *)sgl) == 0) | ||
539 | return g->ops.mm.gpu_phys_addr(g, attrs, | ||
540 | __nvgpu_sgl_phys(g, sgl)); | ||
541 | |||
542 | if (sg_dma_address((struct scatterlist *)sgl) == DMA_ERROR_CODE) | ||
543 | return 0; | ||
544 | |||
545 | return nvgpu_mem_iommu_translate(g, | ||
546 | sg_dma_address((struct scatterlist *)sgl)); | ||
547 | } | ||
548 | |||
549 | static bool nvgpu_mem_linux_sgt_iommuable(struct gk20a *g, | ||
550 | struct nvgpu_sgt *sgt) | ||
551 | { | ||
552 | if (nvgpu_is_enabled(g, NVGPU_MM_USE_PHYSICAL_SG)) | ||
553 | return false; | ||
554 | return true; | ||
555 | } | ||
556 | |||
557 | static void nvgpu_mem_linux_sgl_free(struct gk20a *g, struct nvgpu_sgt *sgt) | ||
558 | { | ||
559 | /* | ||
560 | * Free this SGT. All we do is free the passed SGT. The actual Linux | ||
561 | * SGT/SGL needs to be freed separately. | ||
562 | */ | ||
563 | nvgpu_kfree(g, sgt); | ||
564 | } | ||
565 | |||
566 | static const struct nvgpu_sgt_ops nvgpu_linux_sgt_ops = { | ||
567 | .sgl_next = nvgpu_mem_linux_sgl_next, | ||
568 | .sgl_phys = nvgpu_mem_linux_sgl_phys, | ||
569 | .sgl_dma = nvgpu_mem_linux_sgl_dma, | ||
570 | .sgl_length = nvgpu_mem_linux_sgl_length, | ||
571 | .sgl_gpu_addr = nvgpu_mem_linux_sgl_gpu_addr, | ||
572 | .sgt_iommuable = nvgpu_mem_linux_sgt_iommuable, | ||
573 | .sgt_free = nvgpu_mem_linux_sgl_free, | ||
574 | }; | ||
575 | |||
576 | static struct nvgpu_sgt *__nvgpu_mem_get_sgl_from_vidmem( | ||
577 | struct gk20a *g, | ||
578 | struct scatterlist *linux_sgl) | ||
579 | { | ||
580 | struct nvgpu_page_alloc *vidmem_alloc; | ||
581 | |||
582 | vidmem_alloc = nvgpu_vidmem_get_page_alloc(linux_sgl); | ||
583 | if (!vidmem_alloc) | ||
584 | return NULL; | ||
585 | |||
586 | return &vidmem_alloc->sgt; | ||
587 | } | ||
588 | |||
589 | struct nvgpu_sgt *nvgpu_linux_sgt_create(struct gk20a *g, struct sg_table *sgt) | ||
590 | { | ||
591 | struct nvgpu_sgt *nvgpu_sgt; | ||
592 | struct scatterlist *linux_sgl = sgt->sgl; | ||
593 | |||
594 | if (nvgpu_addr_is_vidmem_page_alloc(sg_dma_address(linux_sgl))) | ||
595 | return __nvgpu_mem_get_sgl_from_vidmem(g, linux_sgl); | ||
596 | |||
597 | nvgpu_sgt = nvgpu_kzalloc(g, sizeof(*nvgpu_sgt)); | ||
598 | if (!nvgpu_sgt) | ||
599 | return NULL; | ||
600 | |||
601 | nvgpu_log(g, gpu_dbg_sgl, "Making Linux SGL!"); | ||
602 | |||
603 | nvgpu_sgt->sgl = (struct nvgpu_sgl *)linux_sgl; | ||
604 | nvgpu_sgt->ops = &nvgpu_linux_sgt_ops; | ||
605 | |||
606 | return nvgpu_sgt; | ||
607 | } | ||
608 | |||
609 | struct nvgpu_sgt *nvgpu_sgt_create_from_mem(struct gk20a *g, | ||
610 | struct nvgpu_mem *mem) | ||
611 | { | ||
612 | return nvgpu_linux_sgt_create(g, mem->priv.sgt); | ||
613 | } | ||