summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlex Waterman <alexw@nvidia.com>2018-02-14 13:58:24 -0500
committermobile promotions <svcmobile_promotions@nvidia.com>2018-02-27 19:03:52 -0500
commit3fdd8e38b280123fd13bcc4f3fd8928c15e94db6 (patch)
tree19334bacb71dbd7f3e08bbf7ba363193e8380851
parent1170687c33f7506f39aaf47acee5430233e3d1a8 (diff)
gpu: nvgpu: Use our own vmap() for coherent DMA buffers
For some reason the GPU does not like the mappings created by the DMA API for coherent sysmem buffers. But a plain vmap() does seem to work. To work around this, when we are using coherent sysmem, force the NO_KERNEL_MAPPING flag to on and then make a vmap() in the nvgpu DMA API wrapper. The rest of the driver will be none the wiser but will work as expected. This problem is not understood yet but it is being tracked in bug 2040115. Once this bug is understood this WAR should either be determined as necessary or reverted with an appropriate fix. Bug 2040115 JIRA EVLR-2333 Change-Id: Idae7a0c92441f0309df572ac18697af49bb6ff2b Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1657568 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
-rw-r--r--drivers/gpu/nvgpu/common/linux/dma.c34
-rw-r--r--drivers/gpu/nvgpu/common/linux/module.c12
-rw-r--r--drivers/gpu/nvgpu/common/linux/nvgpu_mem.c21
-rw-r--r--drivers/gpu/nvgpu/common/linux/pci.c13
4 files changed, 63 insertions, 17 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/dma.c b/drivers/gpu/nvgpu/common/linux/dma.c
index c13dae8b..81aebb7d 100644
--- a/drivers/gpu/nvgpu/common/linux/dma.c
+++ b/drivers/gpu/nvgpu/common/linux/dma.c
@@ -222,6 +222,16 @@ int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags,
222 void *alloc_ret; 222 void *alloc_ret;
223 223
224 /* 224 /*
225 * WAR for IO coherent chips: the DMA API does not seem to generate
226 * mappings that work correctly. Unclear why - Bug ID: 2040115.
227 *
228 * Basically we just tell the DMA API not to map with NO_KERNEL_MAPPING
229 * and then make a vmap() ourselves.
230 */
231 if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
232 flags |= NVGPU_DMA_NO_KERNEL_MAPPING;
233
234 /*
225 * Before the debug print so we see this in the total. But during 235 * Before the debug print so we see this in the total. But during
226 * cleanup in the fail path this has to be subtracted. 236 * cleanup in the fail path this has to be subtracted.
227 */ 237 */
@@ -255,7 +265,17 @@ int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags,
255 iova, size, flags); 265 iova, size, flags);
256 } 266 }
257 if (err) 267 if (err)
258 goto fail_free; 268 goto fail_free_dma;
269
270 if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) {
271 mem->cpu_va = vmap(mem->priv.pages,
272 size >> PAGE_SHIFT,
273 0, PAGE_KERNEL);
274 if (!mem->cpu_va) {
275 err = -ENOMEM;
276 goto fail_free_sgt;
277 }
278 }
259 279
260 mem->aligned_size = size; 280 mem->aligned_size = size;
261 mem->aperture = APERTURE_SYSMEM; 281 mem->aperture = APERTURE_SYSMEM;
@@ -265,12 +285,14 @@ int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags,
265 285
266 return 0; 286 return 0;
267 287
268fail_free: 288fail_free_sgt:
269 g->dma_memory_used -= mem->aligned_size; 289 nvgpu_free_sgtable(g, &mem->priv.sgt);
290fail_free_dma:
270 dma_free_attrs(d, size, alloc_ret, iova, NVGPU_DMA_ATTR(dma_attrs)); 291 dma_free_attrs(d, size, alloc_ret, iova, NVGPU_DMA_ATTR(dma_attrs));
271 mem->cpu_va = NULL; 292 mem->cpu_va = NULL;
272 mem->priv.sgt = NULL; 293 mem->priv.sgt = NULL;
273 mem->size = 0; 294 mem->size = 0;
295 g->dma_memory_used -= mem->aligned_size;
274 return err; 296 return err;
275} 297}
276 298
@@ -466,6 +488,12 @@ static void nvgpu_dma_free_sys(struct gk20a *g, struct nvgpu_mem *mem)
466 if (!(mem->mem_flags & NVGPU_MEM_FLAG_SHADOW_COPY) && 488 if (!(mem->mem_flags & NVGPU_MEM_FLAG_SHADOW_COPY) &&
467 !(mem->mem_flags & __NVGPU_MEM_FLAG_NO_DMA) && 489 !(mem->mem_flags & __NVGPU_MEM_FLAG_NO_DMA) &&
468 (mem->cpu_va || mem->priv.pages)) { 490 (mem->cpu_va || mem->priv.pages)) {
491 /*
492 * Free side of WAR for bug 2040115.
493 */
494 if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
495 vunmap(mem->cpu_va);
496
469 if (mem->priv.flags) { 497 if (mem->priv.flags) {
470 NVGPU_DEFINE_DMA_ATTRS(dma_attrs); 498 NVGPU_DEFINE_DMA_ATTRS(dma_attrs);
471 499
diff --git a/drivers/gpu/nvgpu/common/linux/module.c b/drivers/gpu/nvgpu/common/linux/module.c
index 52348db0..741c86e7 100644
--- a/drivers/gpu/nvgpu/common/linux/module.c
+++ b/drivers/gpu/nvgpu/common/linux/module.c
@@ -1149,6 +1149,12 @@ static int gk20a_probe(struct platform_device *dev)
1149 if (err) 1149 if (err)
1150 goto return_err; 1150 goto return_err;
1151 1151
1152 np = nvgpu_get_node(gk20a);
1153 if (of_dma_is_coherent(np)) {
1154 __nvgpu_set_enabled(gk20a, NVGPU_USE_COHERENT_SYSMEM, true);
1155 __nvgpu_set_enabled(gk20a, NVGPU_SUPPORT_IO_COHERENCE, true);
1156 }
1157
1152 if (nvgpu_platform_is_simulation(gk20a)) 1158 if (nvgpu_platform_is_simulation(gk20a))
1153 __nvgpu_set_enabled(gk20a, NVGPU_IS_FMODEL, true); 1159 __nvgpu_set_enabled(gk20a, NVGPU_IS_FMODEL, true);
1154 1160
@@ -1208,12 +1214,6 @@ static int gk20a_probe(struct platform_device *dev)
1208 1214
1209 gk20a->mm.has_physical_mode = !nvgpu_is_hypervisor_mode(gk20a); 1215 gk20a->mm.has_physical_mode = !nvgpu_is_hypervisor_mode(gk20a);
1210 1216
1211 np = nvgpu_get_node(gk20a);
1212 if (of_dma_is_coherent(np)) {
1213 __nvgpu_set_enabled(gk20a, NVGPU_USE_COHERENT_SYSMEM, true);
1214 __nvgpu_set_enabled(gk20a, NVGPU_SUPPORT_IO_COHERENCE, true);
1215 }
1216
1217 return 0; 1217 return 0;
1218 1218
1219return_err: 1219return_err:
diff --git a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
index c859520d..69897694 100644
--- a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
+++ b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
@@ -45,6 +45,14 @@ int nvgpu_mem_begin(struct gk20a *g, struct nvgpu_mem *mem)
45 return 0; 45 return 0;
46 46
47 /* 47 /*
48 * WAR for bug 2040115: we already will always have a coherent vmap()
49 * for all sysmem buffers. The prot settings are left alone since
50 * eventually this should be deleted.
51 */
52 if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
53 return 0;
54
55 /*
48 * A CPU mapping is implicitly made for all SYSMEM DMA allocations that 56 * A CPU mapping is implicitly made for all SYSMEM DMA allocations that
49 * don't have NVGPU_DMA_NO_KERNEL_MAPPING. Thus we don't need to make 57 * don't have NVGPU_DMA_NO_KERNEL_MAPPING. Thus we don't need to make
50 * another CPU mapping. 58 * another CPU mapping.
@@ -74,6 +82,13 @@ void nvgpu_mem_end(struct gk20a *g, struct nvgpu_mem *mem)
74 return; 82 return;
75 83
76 /* 84 /*
85 * WAR for bug 2040115: skip this since the map will be taken care of
86 * during the free in the DMA API.
87 */
88 if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
89 return;
90
91 /*
77 * Similar to nvgpu_mem_begin() we don't need to unmap the CPU mapping 92 * Similar to nvgpu_mem_begin() we don't need to unmap the CPU mapping
78 * already made by the DMA API. 93 * already made by the DMA API.
79 */ 94 */
@@ -393,8 +408,12 @@ int nvgpu_mem_create_from_mem(struct gk20a *g,
393 408
394 /* 409 /*
395 * Re-use the CPU mapping only if the mapping was made by the DMA API. 410 * Re-use the CPU mapping only if the mapping was made by the DMA API.
411 *
412 * Bug 2040115: the DMA API wrapper makes the mapping that we should
413 * re-use.
396 */ 414 */
397 if (!(src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING)) 415 if (!(src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING) ||
416 nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
398 dest->cpu_va = src->cpu_va + (PAGE_SIZE * start_page); 417 dest->cpu_va = src->cpu_va + (PAGE_SIZE * start_page);
399 418
400 dest->priv.pages = src->priv.pages + start_page; 419 dest->priv.pages = src->priv.pages + start_page;
diff --git a/drivers/gpu/nvgpu/common/linux/pci.c b/drivers/gpu/nvgpu/common/linux/pci.c
index 4ba839c4..973da9ca 100644
--- a/drivers/gpu/nvgpu/common/linux/pci.c
+++ b/drivers/gpu/nvgpu/common/linux/pci.c
@@ -566,6 +566,12 @@ static int nvgpu_pci_probe(struct pci_dev *pdev,
566 platform->g = g; 566 platform->g = g;
567 l->dev = &pdev->dev; 567 l->dev = &pdev->dev;
568 568
569 np = nvgpu_get_node(g);
570 if (of_dma_is_coherent(np)) {
571 __nvgpu_set_enabled(g, NVGPU_USE_COHERENT_SYSMEM, true);
572 __nvgpu_set_enabled(g, NVGPU_SUPPORT_IO_COHERENCE, true);
573 }
574
569 err = pci_enable_device(pdev); 575 err = pci_enable_device(pdev);
570 if (err) 576 if (err)
571 return err; 577 return err;
@@ -644,13 +650,6 @@ static int nvgpu_pci_probe(struct pci_dev *pdev,
644 650
645 g->mm.has_physical_mode = false; 651 g->mm.has_physical_mode = false;
646 652
647 np = nvgpu_get_node(g);
648
649 if (of_dma_is_coherent(np)) {
650 __nvgpu_set_enabled(g, NVGPU_USE_COHERENT_SYSMEM, true);
651 __nvgpu_set_enabled(g, NVGPU_SUPPORT_IO_COHERENCE, true);
652 }
653
654 return 0; 653 return 0;
655} 654}
656 655