diff options
author | Alex Waterman <alexw@nvidia.com> | 2018-02-14 13:58:24 -0500 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2018-02-27 19:03:52 -0500 |
commit | 3fdd8e38b280123fd13bcc4f3fd8928c15e94db6 (patch) | |
tree | 19334bacb71dbd7f3e08bbf7ba363193e8380851 /drivers | |
parent | 1170687c33f7506f39aaf47acee5430233e3d1a8 (diff) |
gpu: nvgpu: Use our own vmap() for coherent DMA buffers
For some reason the GPU does not like the mappings created by the
DMA API for coherent sysmem buffers. But a plain vmap() does seem
to work. To work around this, when we are using coherent sysmem,
force the NO_KERNEL_MAPPING flag to on and then make a vmap() in
the nvgpu DMA API wrapper. The rest of the driver will be none the
wiser but will work as expected.
This problem is not understood yet but it is being tracked in bug
2040115. Once this bug is understood this WAR should either be
determined as necessary or reverted with an appropriate fix.
Bug 2040115
JIRA EVLR-2333
Change-Id: Idae7a0c92441f0309df572ac18697af49bb6ff2b
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1657568
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/dma.c | 34 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/module.c | 12 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/nvgpu_mem.c | 21 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/pci.c | 13 |
4 files changed, 63 insertions, 17 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/dma.c b/drivers/gpu/nvgpu/common/linux/dma.c index c13dae8b..81aebb7d 100644 --- a/drivers/gpu/nvgpu/common/linux/dma.c +++ b/drivers/gpu/nvgpu/common/linux/dma.c | |||
@@ -222,6 +222,16 @@ int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags, | |||
222 | void *alloc_ret; | 222 | void *alloc_ret; |
223 | 223 | ||
224 | /* | 224 | /* |
225 | * WAR for IO coherent chips: the DMA API does not seem to generate | ||
226 | * mappings that work correctly. Unclear why - Bug ID: 2040115. | ||
227 | * | ||
228 | * Basically we just tell the DMA API not to map with NO_KERNEL_MAPPING | ||
229 | * and then make a vmap() ourselves. | ||
230 | */ | ||
231 | if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) | ||
232 | flags |= NVGPU_DMA_NO_KERNEL_MAPPING; | ||
233 | |||
234 | /* | ||
225 | * Before the debug print so we see this in the total. But during | 235 | * Before the debug print so we see this in the total. But during |
226 | * cleanup in the fail path this has to be subtracted. | 236 | * cleanup in the fail path this has to be subtracted. |
227 | */ | 237 | */ |
@@ -255,7 +265,17 @@ int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags, | |||
255 | iova, size, flags); | 265 | iova, size, flags); |
256 | } | 266 | } |
257 | if (err) | 267 | if (err) |
258 | goto fail_free; | 268 | goto fail_free_dma; |
269 | |||
270 | if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) { | ||
271 | mem->cpu_va = vmap(mem->priv.pages, | ||
272 | size >> PAGE_SHIFT, | ||
273 | 0, PAGE_KERNEL); | ||
274 | if (!mem->cpu_va) { | ||
275 | err = -ENOMEM; | ||
276 | goto fail_free_sgt; | ||
277 | } | ||
278 | } | ||
259 | 279 | ||
260 | mem->aligned_size = size; | 280 | mem->aligned_size = size; |
261 | mem->aperture = APERTURE_SYSMEM; | 281 | mem->aperture = APERTURE_SYSMEM; |
@@ -265,12 +285,14 @@ int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags, | |||
265 | 285 | ||
266 | return 0; | 286 | return 0; |
267 | 287 | ||
268 | fail_free: | 288 | fail_free_sgt: |
269 | g->dma_memory_used -= mem->aligned_size; | 289 | nvgpu_free_sgtable(g, &mem->priv.sgt); |
290 | fail_free_dma: | ||
270 | dma_free_attrs(d, size, alloc_ret, iova, NVGPU_DMA_ATTR(dma_attrs)); | 291 | dma_free_attrs(d, size, alloc_ret, iova, NVGPU_DMA_ATTR(dma_attrs)); |
271 | mem->cpu_va = NULL; | 292 | mem->cpu_va = NULL; |
272 | mem->priv.sgt = NULL; | 293 | mem->priv.sgt = NULL; |
273 | mem->size = 0; | 294 | mem->size = 0; |
295 | g->dma_memory_used -= mem->aligned_size; | ||
274 | return err; | 296 | return err; |
275 | } | 297 | } |
276 | 298 | ||
@@ -466,6 +488,12 @@ static void nvgpu_dma_free_sys(struct gk20a *g, struct nvgpu_mem *mem) | |||
466 | if (!(mem->mem_flags & NVGPU_MEM_FLAG_SHADOW_COPY) && | 488 | if (!(mem->mem_flags & NVGPU_MEM_FLAG_SHADOW_COPY) && |
467 | !(mem->mem_flags & __NVGPU_MEM_FLAG_NO_DMA) && | 489 | !(mem->mem_flags & __NVGPU_MEM_FLAG_NO_DMA) && |
468 | (mem->cpu_va || mem->priv.pages)) { | 490 | (mem->cpu_va || mem->priv.pages)) { |
491 | /* | ||
492 | * Free side of WAR for bug 2040115. | ||
493 | */ | ||
494 | if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) | ||
495 | vunmap(mem->cpu_va); | ||
496 | |||
469 | if (mem->priv.flags) { | 497 | if (mem->priv.flags) { |
470 | NVGPU_DEFINE_DMA_ATTRS(dma_attrs); | 498 | NVGPU_DEFINE_DMA_ATTRS(dma_attrs); |
471 | 499 | ||
diff --git a/drivers/gpu/nvgpu/common/linux/module.c b/drivers/gpu/nvgpu/common/linux/module.c index 52348db0..741c86e7 100644 --- a/drivers/gpu/nvgpu/common/linux/module.c +++ b/drivers/gpu/nvgpu/common/linux/module.c | |||
@@ -1149,6 +1149,12 @@ static int gk20a_probe(struct platform_device *dev) | |||
1149 | if (err) | 1149 | if (err) |
1150 | goto return_err; | 1150 | goto return_err; |
1151 | 1151 | ||
1152 | np = nvgpu_get_node(gk20a); | ||
1153 | if (of_dma_is_coherent(np)) { | ||
1154 | __nvgpu_set_enabled(gk20a, NVGPU_USE_COHERENT_SYSMEM, true); | ||
1155 | __nvgpu_set_enabled(gk20a, NVGPU_SUPPORT_IO_COHERENCE, true); | ||
1156 | } | ||
1157 | |||
1152 | if (nvgpu_platform_is_simulation(gk20a)) | 1158 | if (nvgpu_platform_is_simulation(gk20a)) |
1153 | __nvgpu_set_enabled(gk20a, NVGPU_IS_FMODEL, true); | 1159 | __nvgpu_set_enabled(gk20a, NVGPU_IS_FMODEL, true); |
1154 | 1160 | ||
@@ -1208,12 +1214,6 @@ static int gk20a_probe(struct platform_device *dev) | |||
1208 | 1214 | ||
1209 | gk20a->mm.has_physical_mode = !nvgpu_is_hypervisor_mode(gk20a); | 1215 | gk20a->mm.has_physical_mode = !nvgpu_is_hypervisor_mode(gk20a); |
1210 | 1216 | ||
1211 | np = nvgpu_get_node(gk20a); | ||
1212 | if (of_dma_is_coherent(np)) { | ||
1213 | __nvgpu_set_enabled(gk20a, NVGPU_USE_COHERENT_SYSMEM, true); | ||
1214 | __nvgpu_set_enabled(gk20a, NVGPU_SUPPORT_IO_COHERENCE, true); | ||
1215 | } | ||
1216 | |||
1217 | return 0; | 1217 | return 0; |
1218 | 1218 | ||
1219 | return_err: | 1219 | return_err: |
diff --git a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c index c859520d..69897694 100644 --- a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c +++ b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c | |||
@@ -45,6 +45,14 @@ int nvgpu_mem_begin(struct gk20a *g, struct nvgpu_mem *mem) | |||
45 | return 0; | 45 | return 0; |
46 | 46 | ||
47 | /* | 47 | /* |
48 | * WAR for bug 2040115: we already will always have a coherent vmap() | ||
49 | * for all sysmem buffers. The prot settings are left alone since | ||
50 | * eventually this should be deleted. | ||
51 | */ | ||
52 | if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) | ||
53 | return 0; | ||
54 | |||
55 | /* | ||
48 | * A CPU mapping is implicitly made for all SYSMEM DMA allocations that | 56 | * A CPU mapping is implicitly made for all SYSMEM DMA allocations that |
49 | * don't have NVGPU_DMA_NO_KERNEL_MAPPING. Thus we don't need to make | 57 | * don't have NVGPU_DMA_NO_KERNEL_MAPPING. Thus we don't need to make |
50 | * another CPU mapping. | 58 | * another CPU mapping. |
@@ -74,6 +82,13 @@ void nvgpu_mem_end(struct gk20a *g, struct nvgpu_mem *mem) | |||
74 | return; | 82 | return; |
75 | 83 | ||
76 | /* | 84 | /* |
85 | * WAR for bug 2040115: skip this since the map will be taken care of | ||
86 | * during the free in the DMA API. | ||
87 | */ | ||
88 | if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) | ||
89 | return; | ||
90 | |||
91 | /* | ||
77 | * Similar to nvgpu_mem_begin() we don't need to unmap the CPU mapping | 92 | * Similar to nvgpu_mem_begin() we don't need to unmap the CPU mapping |
78 | * already made by the DMA API. | 93 | * already made by the DMA API. |
79 | */ | 94 | */ |
@@ -393,8 +408,12 @@ int nvgpu_mem_create_from_mem(struct gk20a *g, | |||
393 | 408 | ||
394 | /* | 409 | /* |
395 | * Re-use the CPU mapping only if the mapping was made by the DMA API. | 410 | * Re-use the CPU mapping only if the mapping was made by the DMA API. |
411 | * | ||
412 | * Bug 2040115: the DMA API wrapper makes the mapping that we should | ||
413 | * re-use. | ||
396 | */ | 414 | */ |
397 | if (!(src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING)) | 415 | if (!(src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING) || |
416 | nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) | ||
398 | dest->cpu_va = src->cpu_va + (PAGE_SIZE * start_page); | 417 | dest->cpu_va = src->cpu_va + (PAGE_SIZE * start_page); |
399 | 418 | ||
400 | dest->priv.pages = src->priv.pages + start_page; | 419 | dest->priv.pages = src->priv.pages + start_page; |
diff --git a/drivers/gpu/nvgpu/common/linux/pci.c b/drivers/gpu/nvgpu/common/linux/pci.c index 4ba839c4..973da9ca 100644 --- a/drivers/gpu/nvgpu/common/linux/pci.c +++ b/drivers/gpu/nvgpu/common/linux/pci.c | |||
@@ -566,6 +566,12 @@ static int nvgpu_pci_probe(struct pci_dev *pdev, | |||
566 | platform->g = g; | 566 | platform->g = g; |
567 | l->dev = &pdev->dev; | 567 | l->dev = &pdev->dev; |
568 | 568 | ||
569 | np = nvgpu_get_node(g); | ||
570 | if (of_dma_is_coherent(np)) { | ||
571 | __nvgpu_set_enabled(g, NVGPU_USE_COHERENT_SYSMEM, true); | ||
572 | __nvgpu_set_enabled(g, NVGPU_SUPPORT_IO_COHERENCE, true); | ||
573 | } | ||
574 | |||
569 | err = pci_enable_device(pdev); | 575 | err = pci_enable_device(pdev); |
570 | if (err) | 576 | if (err) |
571 | return err; | 577 | return err; |
@@ -644,13 +650,6 @@ static int nvgpu_pci_probe(struct pci_dev *pdev, | |||
644 | 650 | ||
645 | g->mm.has_physical_mode = false; | 651 | g->mm.has_physical_mode = false; |
646 | 652 | ||
647 | np = nvgpu_get_node(g); | ||
648 | |||
649 | if (of_dma_is_coherent(np)) { | ||
650 | __nvgpu_set_enabled(g, NVGPU_USE_COHERENT_SYSMEM, true); | ||
651 | __nvgpu_set_enabled(g, NVGPU_SUPPORT_IO_COHERENCE, true); | ||
652 | } | ||
653 | |||
654 | return 0; | 653 | return 0; |
655 | } | 654 | } |
656 | 655 | ||