diff options
author | Alex Waterman <alexw@nvidia.com> | 2018-03-06 13:43:16 -0500 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2018-03-07 21:04:41 -0500 |
commit | 418f31cd91a5c3ca45f0920ed64205def49c8a80 (patch) | |
tree | 17e3e04065679788aeeff645842866df0d59ccd0 /drivers/gpu/nvgpu/common/linux | |
parent | f85a0d3e00b53453f3d5ca556f15465078473f31 (diff) |
gpu: nvgpu: Enable IO coherency on GV100
This reverts commit 848af2ce6de6140323a6ffe3075bf8021e119434.
This is a revert of a revert, etc, etc. It re-enables IO coherence again.
JIRA EVLR-2333
Change-Id: Ibf97dce2f892e48a1200a06cd38a1c5d9603be04
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1669722
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/common/linux')
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/dma.c | 34 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/module.c | 8 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/nvgpu_mem.c | 51 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/pci.c | 17 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/vm.c | 3 |
5 files changed, 73 insertions, 40 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/dma.c b/drivers/gpu/nvgpu/common/linux/dma.c index c111d22b..53e54bc6 100644 --- a/drivers/gpu/nvgpu/common/linux/dma.c +++ b/drivers/gpu/nvgpu/common/linux/dma.c | |||
@@ -227,6 +227,16 @@ int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags, | |||
227 | } | 227 | } |
228 | 228 | ||
229 | /* | 229 | /* |
230 | * WAR for IO coherent chips: the DMA API does not seem to generate | ||
231 | * mappings that work correctly. Unclear why - Bug ID: 2040115. | ||
232 | * | ||
233 | * Basically we just tell the DMA API not to map with NO_KERNEL_MAPPING | ||
234 | * and then make a vmap() ourselves. | ||
235 | */ | ||
236 | if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) | ||
237 | flags |= NVGPU_DMA_NO_KERNEL_MAPPING; | ||
238 | |||
239 | /* | ||
230 | * Before the debug print so we see this in the total. But during | 240 | * Before the debug print so we see this in the total. But during |
231 | * cleanup in the fail path this has to be subtracted. | 241 | * cleanup in the fail path this has to be subtracted. |
232 | */ | 242 | */ |
@@ -260,7 +270,17 @@ int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags, | |||
260 | iova, size, flags); | 270 | iova, size, flags); |
261 | } | 271 | } |
262 | if (err) | 272 | if (err) |
263 | goto fail_free; | 273 | goto fail_free_dma; |
274 | |||
275 | if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) { | ||
276 | mem->cpu_va = vmap(mem->priv.pages, | ||
277 | size >> PAGE_SHIFT, | ||
278 | 0, PAGE_KERNEL); | ||
279 | if (!mem->cpu_va) { | ||
280 | err = -ENOMEM; | ||
281 | goto fail_free_sgt; | ||
282 | } | ||
283 | } | ||
264 | 284 | ||
265 | mem->aligned_size = size; | 285 | mem->aligned_size = size; |
266 | mem->aperture = APERTURE_SYSMEM; | 286 | mem->aperture = APERTURE_SYSMEM; |
@@ -270,12 +290,14 @@ int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags, | |||
270 | 290 | ||
271 | return 0; | 291 | return 0; |
272 | 292 | ||
273 | fail_free: | 293 | fail_free_sgt: |
274 | g->dma_memory_used -= mem->aligned_size; | 294 | nvgpu_free_sgtable(g, &mem->priv.sgt); |
295 | fail_free_dma: | ||
275 | dma_free_attrs(d, size, alloc_ret, iova, NVGPU_DMA_ATTR(dma_attrs)); | 296 | dma_free_attrs(d, size, alloc_ret, iova, NVGPU_DMA_ATTR(dma_attrs)); |
276 | mem->cpu_va = NULL; | 297 | mem->cpu_va = NULL; |
277 | mem->priv.sgt = NULL; | 298 | mem->priv.sgt = NULL; |
278 | mem->size = 0; | 299 | mem->size = 0; |
300 | g->dma_memory_used -= mem->aligned_size; | ||
279 | return err; | 301 | return err; |
280 | } | 302 | } |
281 | 303 | ||
@@ -476,6 +498,12 @@ static void nvgpu_dma_free_sys(struct gk20a *g, struct nvgpu_mem *mem) | |||
476 | if (!(mem->mem_flags & NVGPU_MEM_FLAG_SHADOW_COPY) && | 498 | if (!(mem->mem_flags & NVGPU_MEM_FLAG_SHADOW_COPY) && |
477 | !(mem->mem_flags & __NVGPU_MEM_FLAG_NO_DMA) && | 499 | !(mem->mem_flags & __NVGPU_MEM_FLAG_NO_DMA) && |
478 | (mem->cpu_va || mem->priv.pages)) { | 500 | (mem->cpu_va || mem->priv.pages)) { |
501 | /* | ||
502 | * Free side of WAR for bug 2040115. | ||
503 | */ | ||
504 | if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) | ||
505 | vunmap(mem->cpu_va); | ||
506 | |||
479 | if (mem->priv.flags) { | 507 | if (mem->priv.flags) { |
480 | NVGPU_DEFINE_DMA_ATTRS(dma_attrs); | 508 | NVGPU_DEFINE_DMA_ATTRS(dma_attrs); |
481 | 509 | ||
diff --git a/drivers/gpu/nvgpu/common/linux/module.c b/drivers/gpu/nvgpu/common/linux/module.c index 59691c64..92e47e2d 100644 --- a/drivers/gpu/nvgpu/common/linux/module.c +++ b/drivers/gpu/nvgpu/common/linux/module.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/of.h> | 20 | #include <linux/of.h> |
21 | #include <linux/of_device.h> | 21 | #include <linux/of_device.h> |
22 | #include <linux/of_platform.h> | 22 | #include <linux/of_platform.h> |
23 | #include <linux/of_address.h> | ||
23 | #include <linux/interrupt.h> | 24 | #include <linux/interrupt.h> |
24 | #include <linux/pm_runtime.h> | 25 | #include <linux/pm_runtime.h> |
25 | #include <linux/reset.h> | 26 | #include <linux/reset.h> |
@@ -1111,6 +1112,7 @@ static int gk20a_probe(struct platform_device *dev) | |||
1111 | struct gk20a *gk20a; | 1112 | struct gk20a *gk20a; |
1112 | int err; | 1113 | int err; |
1113 | struct gk20a_platform *platform = NULL; | 1114 | struct gk20a_platform *platform = NULL; |
1115 | struct device_node *np; | ||
1114 | 1116 | ||
1115 | if (dev->dev.of_node) { | 1117 | if (dev->dev.of_node) { |
1116 | const struct of_device_id *match; | 1118 | const struct of_device_id *match; |
@@ -1151,6 +1153,12 @@ static int gk20a_probe(struct platform_device *dev) | |||
1151 | if (err) | 1153 | if (err) |
1152 | goto return_err; | 1154 | goto return_err; |
1153 | 1155 | ||
1156 | np = nvgpu_get_node(gk20a); | ||
1157 | if (of_dma_is_coherent(np)) { | ||
1158 | __nvgpu_set_enabled(gk20a, NVGPU_USE_COHERENT_SYSMEM, true); | ||
1159 | __nvgpu_set_enabled(gk20a, NVGPU_SUPPORT_IO_COHERENCE, true); | ||
1160 | } | ||
1161 | |||
1154 | if (nvgpu_platform_is_simulation(gk20a)) | 1162 | if (nvgpu_platform_is_simulation(gk20a)) |
1155 | __nvgpu_set_enabled(gk20a, NVGPU_IS_FMODEL, true); | 1163 | __nvgpu_set_enabled(gk20a, NVGPU_IS_FMODEL, true); |
1156 | 1164 | ||
diff --git a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c index 7406c4d7..d4549e1b 100644 --- a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c +++ b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c | |||
@@ -34,40 +34,25 @@ | |||
34 | #include "gk20a/gk20a.h" | 34 | #include "gk20a/gk20a.h" |
35 | #include "gk20a/mm_gk20a.h" | 35 | #include "gk20a/mm_gk20a.h" |
36 | 36 | ||
37 | u32 __nvgpu_aperture_mask(struct gk20a *g, enum nvgpu_aperture aperture, | ||
38 | u32 sysmem_mask, u32 vidmem_mask) | ||
39 | { | ||
40 | switch (aperture) { | ||
41 | case APERTURE_SYSMEM: | ||
42 | /* some igpus consider system memory vidmem */ | ||
43 | return nvgpu_is_enabled(g, NVGPU_MM_HONORS_APERTURE) | ||
44 | ? sysmem_mask : vidmem_mask; | ||
45 | case APERTURE_VIDMEM: | ||
46 | /* for dgpus only */ | ||
47 | return vidmem_mask; | ||
48 | case APERTURE_INVALID: | ||
49 | WARN_ON("Bad aperture"); | ||
50 | } | ||
51 | return 0; | ||
52 | } | ||
53 | |||
54 | u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem, | ||
55 | u32 sysmem_mask, u32 vidmem_mask) | ||
56 | { | ||
57 | return __nvgpu_aperture_mask(g, mem->aperture, | ||
58 | sysmem_mask, vidmem_mask); | ||
59 | } | ||
60 | |||
61 | int nvgpu_mem_begin(struct gk20a *g, struct nvgpu_mem *mem) | 37 | int nvgpu_mem_begin(struct gk20a *g, struct nvgpu_mem *mem) |
62 | { | 38 | { |
63 | void *cpu_va; | 39 | void *cpu_va; |
64 | pgprot_t prot = nvgpu_is_enabled(g, NVGPU_DMA_COHERENT) ? PAGE_KERNEL : | 40 | pgprot_t prot = nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) ? |
41 | PAGE_KERNEL : | ||
65 | pgprot_writecombine(PAGE_KERNEL); | 42 | pgprot_writecombine(PAGE_KERNEL); |
66 | 43 | ||
67 | if (mem->aperture != APERTURE_SYSMEM || g->mm.force_pramin) | 44 | if (mem->aperture != APERTURE_SYSMEM || g->mm.force_pramin) |
68 | return 0; | 45 | return 0; |
69 | 46 | ||
70 | /* | 47 | /* |
48 | * WAR for bug 2040115: we already will always have a coherent vmap() | ||
49 | * for all sysmem buffers. The prot settings are left alone since | ||
50 | * eventually this should be deleted. | ||
51 | */ | ||
52 | if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) | ||
53 | return 0; | ||
54 | |||
55 | /* | ||
71 | * A CPU mapping is implicitly made for all SYSMEM DMA allocations that | 56 | * A CPU mapping is implicitly made for all SYSMEM DMA allocations that |
72 | * don't have NVGPU_DMA_NO_KERNEL_MAPPING. Thus we don't need to make | 57 | * don't have NVGPU_DMA_NO_KERNEL_MAPPING. Thus we don't need to make |
73 | * another CPU mapping. | 58 | * another CPU mapping. |
@@ -97,6 +82,13 @@ void nvgpu_mem_end(struct gk20a *g, struct nvgpu_mem *mem) | |||
97 | return; | 82 | return; |
98 | 83 | ||
99 | /* | 84 | /* |
85 | * WAR for bug 2040115: skip this since the map will be taken care of | ||
86 | * during the free in the DMA API. | ||
87 | */ | ||
88 | if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) | ||
89 | return; | ||
90 | |||
91 | /* | ||
100 | * Similar to nvgpu_mem_begin() we don't need to unmap the CPU mapping | 92 | * Similar to nvgpu_mem_begin() we don't need to unmap the CPU mapping |
101 | * already made by the DMA API. | 93 | * already made by the DMA API. |
102 | */ | 94 | */ |
@@ -315,7 +307,8 @@ void nvgpu_memset(struct gk20a *g, struct nvgpu_mem *mem, u32 offset, | |||
315 | */ | 307 | */ |
316 | u64 nvgpu_mem_get_addr_sgl(struct gk20a *g, struct scatterlist *sgl) | 308 | u64 nvgpu_mem_get_addr_sgl(struct gk20a *g, struct scatterlist *sgl) |
317 | { | 309 | { |
318 | if (!nvgpu_iommuable(g)) | 310 | if (nvgpu_is_enabled(g, NVGPU_MM_USE_PHYSICAL_SG) || |
311 | !nvgpu_iommuable(g)) | ||
319 | return g->ops.mm.gpu_phys_addr(g, NULL, sg_phys(sgl)); | 312 | return g->ops.mm.gpu_phys_addr(g, NULL, sg_phys(sgl)); |
320 | 313 | ||
321 | if (sg_dma_address(sgl) == 0) | 314 | if (sg_dma_address(sgl) == 0) |
@@ -415,8 +408,12 @@ int nvgpu_mem_create_from_mem(struct gk20a *g, | |||
415 | 408 | ||
416 | /* | 409 | /* |
417 | * Re-use the CPU mapping only if the mapping was made by the DMA API. | 410 | * Re-use the CPU mapping only if the mapping was made by the DMA API. |
411 | * | ||
412 | * Bug 2040115: the DMA API wrapper makes the mapping that we should | ||
413 | * re-use. | ||
418 | */ | 414 | */ |
419 | if (!(src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING)) | 415 | if (!(src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING) || |
416 | nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) | ||
420 | dest->cpu_va = src->cpu_va + (PAGE_SIZE * start_page); | 417 | dest->cpu_va = src->cpu_va + (PAGE_SIZE * start_page); |
421 | 418 | ||
422 | dest->priv.pages = src->priv.pages + start_page; | 419 | dest->priv.pages = src->priv.pages + start_page; |
diff --git a/drivers/gpu/nvgpu/common/linux/pci.c b/drivers/gpu/nvgpu/common/linux/pci.c index 6ebe8dda..973da9ca 100644 --- a/drivers/gpu/nvgpu/common/linux/pci.c +++ b/drivers/gpu/nvgpu/common/linux/pci.c | |||
@@ -17,13 +17,13 @@ | |||
17 | #include <linux/pci.h> | 17 | #include <linux/pci.h> |
18 | #include <linux/interrupt.h> | 18 | #include <linux/interrupt.h> |
19 | #include <linux/pm_runtime.h> | 19 | #include <linux/pm_runtime.h> |
20 | #include <linux/of_platform.h> | ||
21 | #include <linux/of_address.h> | ||
20 | 22 | ||
21 | #include <nvgpu/nvgpu_common.h> | 23 | #include <nvgpu/nvgpu_common.h> |
22 | #include <nvgpu/kmem.h> | 24 | #include <nvgpu/kmem.h> |
23 | #include <nvgpu/enabled.h> | 25 | #include <nvgpu/enabled.h> |
24 | #include <nvgpu/nvlink.h> | 26 | #include <nvgpu/nvlink.h> |
25 | #include <linux/of_platform.h> | ||
26 | #include <linux/of_address.h> | ||
27 | 27 | ||
28 | #include "gk20a/gk20a.h" | 28 | #include "gk20a/gk20a.h" |
29 | #include "clk/clk.h" | 29 | #include "clk/clk.h" |
@@ -566,6 +566,12 @@ static int nvgpu_pci_probe(struct pci_dev *pdev, | |||
566 | platform->g = g; | 566 | platform->g = g; |
567 | l->dev = &pdev->dev; | 567 | l->dev = &pdev->dev; |
568 | 568 | ||
569 | np = nvgpu_get_node(g); | ||
570 | if (of_dma_is_coherent(np)) { | ||
571 | __nvgpu_set_enabled(g, NVGPU_USE_COHERENT_SYSMEM, true); | ||
572 | __nvgpu_set_enabled(g, NVGPU_SUPPORT_IO_COHERENCE, true); | ||
573 | } | ||
574 | |||
569 | err = pci_enable_device(pdev); | 575 | err = pci_enable_device(pdev); |
570 | if (err) | 576 | if (err) |
571 | return err; | 577 | return err; |
@@ -644,13 +650,6 @@ static int nvgpu_pci_probe(struct pci_dev *pdev, | |||
644 | 650 | ||
645 | g->mm.has_physical_mode = false; | 651 | g->mm.has_physical_mode = false; |
646 | 652 | ||
647 | np = nvgpu_get_node(g); | ||
648 | |||
649 | if (of_dma_is_coherent(np)) { | ||
650 | __nvgpu_set_enabled(g, NVGPU_DMA_COHERENT, true); | ||
651 | __nvgpu_set_enabled(g, NVGPU_SUPPORT_IO_COHERENCE, true); | ||
652 | } | ||
653 | |||
654 | return 0; | 653 | return 0; |
655 | } | 654 | } |
656 | 655 | ||
diff --git a/drivers/gpu/nvgpu/common/linux/vm.c b/drivers/gpu/nvgpu/common/linux/vm.c index e3ca4eda..52b2f30c 100644 --- a/drivers/gpu/nvgpu/common/linux/vm.c +++ b/drivers/gpu/nvgpu/common/linux/vm.c | |||
@@ -166,7 +166,8 @@ struct nvgpu_mapped_buf *nvgpu_vm_find_mapping(struct vm_gk20a *vm, | |||
166 | vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10, | 166 | vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10, |
167 | vm_aspace_id(vm), | 167 | vm_aspace_id(vm), |
168 | mapped_buffer->flags, | 168 | mapped_buffer->flags, |
169 | nvgpu_aperture_str(gk20a_dmabuf_aperture(g, os_buf->dmabuf))); | 169 | nvgpu_aperture_str(g, |
170 | gk20a_dmabuf_aperture(g, os_buf->dmabuf))); | ||
170 | 171 | ||
171 | return mapped_buffer; | 172 | return mapped_buffer; |
172 | } | 173 | } |