diff options
author | Alex Waterman <alexw@nvidia.com> | 2018-02-28 12:19:19 -0500 |
---|---|---|
committer | Srikar Srimath Tirumala <srikars@nvidia.com> | 2018-02-28 16:49:22 -0500 |
commit | 5a35a95654d561fce09a3b9abf6b82bb7a29d74b (patch) | |
tree | 119a07134188d8e06c29a570dd8c6b143f39c9e1 /drivers/gpu/nvgpu/common/linux | |
parent | 3fdd8e38b280123fd13bcc4f3fd8928c15e94db6 (diff) |
Revert "gpu: nvgpu: Get coherency on gv100 + NVLINK working"
Also revert other changes related to IO coherence. This may be the
culprit in a recent dev-kernel lockdown.
Bug 2070609
Change-Id: Ida178aef161fadbc6db9512521ea51c702c1564b
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1665914
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Srikar Srimath Tirumala <srikars@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/common/linux')
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/dma.c | 34 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/module.c | 8 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/nvgpu_mem.c | 51 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/pci.c | 17 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/vm.c | 3 |
5 files changed, 40 insertions, 73 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/dma.c b/drivers/gpu/nvgpu/common/linux/dma.c index 81aebb7d..c13dae8b 100644 --- a/drivers/gpu/nvgpu/common/linux/dma.c +++ b/drivers/gpu/nvgpu/common/linux/dma.c | |||
@@ -222,16 +222,6 @@ int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags, | |||
222 | void *alloc_ret; | 222 | void *alloc_ret; |
223 | 223 | ||
224 | /* | 224 | /* |
225 | * WAR for IO coherent chips: the DMA API does not seem to generate | ||
226 | * mappings that work correctly. Unclear why - Bug ID: 2040115. | ||
227 | * | ||
228 | * Basically we just tell the DMA API not to map with NO_KERNEL_MAPPING | ||
229 | * and then make a vmap() ourselves. | ||
230 | */ | ||
231 | if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) | ||
232 | flags |= NVGPU_DMA_NO_KERNEL_MAPPING; | ||
233 | |||
234 | /* | ||
235 | * Before the debug print so we see this in the total. But during | 225 | * Before the debug print so we see this in the total. But during |
236 | * cleanup in the fail path this has to be subtracted. | 226 | * cleanup in the fail path this has to be subtracted. |
237 | */ | 227 | */ |
@@ -265,17 +255,7 @@ int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags, | |||
265 | iova, size, flags); | 255 | iova, size, flags); |
266 | } | 256 | } |
267 | if (err) | 257 | if (err) |
268 | goto fail_free_dma; | 258 | goto fail_free; |
269 | |||
270 | if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) { | ||
271 | mem->cpu_va = vmap(mem->priv.pages, | ||
272 | size >> PAGE_SHIFT, | ||
273 | 0, PAGE_KERNEL); | ||
274 | if (!mem->cpu_va) { | ||
275 | err = -ENOMEM; | ||
276 | goto fail_free_sgt; | ||
277 | } | ||
278 | } | ||
279 | 259 | ||
280 | mem->aligned_size = size; | 260 | mem->aligned_size = size; |
281 | mem->aperture = APERTURE_SYSMEM; | 261 | mem->aperture = APERTURE_SYSMEM; |
@@ -285,14 +265,12 @@ int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags, | |||
285 | 265 | ||
286 | return 0; | 266 | return 0; |
287 | 267 | ||
288 | fail_free_sgt: | 268 | fail_free: |
289 | nvgpu_free_sgtable(g, &mem->priv.sgt); | 269 | g->dma_memory_used -= mem->aligned_size; |
290 | fail_free_dma: | ||
291 | dma_free_attrs(d, size, alloc_ret, iova, NVGPU_DMA_ATTR(dma_attrs)); | 270 | dma_free_attrs(d, size, alloc_ret, iova, NVGPU_DMA_ATTR(dma_attrs)); |
292 | mem->cpu_va = NULL; | 271 | mem->cpu_va = NULL; |
293 | mem->priv.sgt = NULL; | 272 | mem->priv.sgt = NULL; |
294 | mem->size = 0; | 273 | mem->size = 0; |
295 | g->dma_memory_used -= mem->aligned_size; | ||
296 | return err; | 274 | return err; |
297 | } | 275 | } |
298 | 276 | ||
@@ -488,12 +466,6 @@ static void nvgpu_dma_free_sys(struct gk20a *g, struct nvgpu_mem *mem) | |||
488 | if (!(mem->mem_flags & NVGPU_MEM_FLAG_SHADOW_COPY) && | 466 | if (!(mem->mem_flags & NVGPU_MEM_FLAG_SHADOW_COPY) && |
489 | !(mem->mem_flags & __NVGPU_MEM_FLAG_NO_DMA) && | 467 | !(mem->mem_flags & __NVGPU_MEM_FLAG_NO_DMA) && |
490 | (mem->cpu_va || mem->priv.pages)) { | 468 | (mem->cpu_va || mem->priv.pages)) { |
491 | /* | ||
492 | * Free side of WAR for bug 2040115. | ||
493 | */ | ||
494 | if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) | ||
495 | vunmap(mem->cpu_va); | ||
496 | |||
497 | if (mem->priv.flags) { | 469 | if (mem->priv.flags) { |
498 | NVGPU_DEFINE_DMA_ATTRS(dma_attrs); | 470 | NVGPU_DEFINE_DMA_ATTRS(dma_attrs); |
499 | 471 | ||
diff --git a/drivers/gpu/nvgpu/common/linux/module.c b/drivers/gpu/nvgpu/common/linux/module.c index 741c86e7..b103fcea 100644 --- a/drivers/gpu/nvgpu/common/linux/module.c +++ b/drivers/gpu/nvgpu/common/linux/module.c | |||
@@ -20,7 +20,6 @@ | |||
20 | #include <linux/of.h> | 20 | #include <linux/of.h> |
21 | #include <linux/of_device.h> | 21 | #include <linux/of_device.h> |
22 | #include <linux/of_platform.h> | 22 | #include <linux/of_platform.h> |
23 | #include <linux/of_address.h> | ||
24 | #include <linux/interrupt.h> | 23 | #include <linux/interrupt.h> |
25 | #include <linux/pm_runtime.h> | 24 | #include <linux/pm_runtime.h> |
26 | #include <linux/reset.h> | 25 | #include <linux/reset.h> |
@@ -1108,7 +1107,6 @@ static int gk20a_probe(struct platform_device *dev) | |||
1108 | struct gk20a *gk20a; | 1107 | struct gk20a *gk20a; |
1109 | int err; | 1108 | int err; |
1110 | struct gk20a_platform *platform = NULL; | 1109 | struct gk20a_platform *platform = NULL; |
1111 | struct device_node *np; | ||
1112 | 1110 | ||
1113 | if (dev->dev.of_node) { | 1111 | if (dev->dev.of_node) { |
1114 | const struct of_device_id *match; | 1112 | const struct of_device_id *match; |
@@ -1149,12 +1147,6 @@ static int gk20a_probe(struct platform_device *dev) | |||
1149 | if (err) | 1147 | if (err) |
1150 | goto return_err; | 1148 | goto return_err; |
1151 | 1149 | ||
1152 | np = nvgpu_get_node(gk20a); | ||
1153 | if (of_dma_is_coherent(np)) { | ||
1154 | __nvgpu_set_enabled(gk20a, NVGPU_USE_COHERENT_SYSMEM, true); | ||
1155 | __nvgpu_set_enabled(gk20a, NVGPU_SUPPORT_IO_COHERENCE, true); | ||
1156 | } | ||
1157 | |||
1158 | if (nvgpu_platform_is_simulation(gk20a)) | 1150 | if (nvgpu_platform_is_simulation(gk20a)) |
1159 | __nvgpu_set_enabled(gk20a, NVGPU_IS_FMODEL, true); | 1151 | __nvgpu_set_enabled(gk20a, NVGPU_IS_FMODEL, true); |
1160 | 1152 | ||
diff --git a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c index 69897694..206b83e1 100644 --- a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c +++ b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c | |||
@@ -34,25 +34,40 @@ | |||
34 | #include "gk20a/gk20a.h" | 34 | #include "gk20a/gk20a.h" |
35 | #include "gk20a/mm_gk20a.h" | 35 | #include "gk20a/mm_gk20a.h" |
36 | 36 | ||
37 | u32 __nvgpu_aperture_mask(struct gk20a *g, enum nvgpu_aperture aperture, | ||
38 | u32 sysmem_mask, u32 vidmem_mask) | ||
39 | { | ||
40 | switch (aperture) { | ||
41 | case APERTURE_SYSMEM: | ||
42 | /* some igpus consider system memory vidmem */ | ||
43 | return nvgpu_is_enabled(g, NVGPU_MM_HONORS_APERTURE) | ||
44 | ? sysmem_mask : vidmem_mask; | ||
45 | case APERTURE_VIDMEM: | ||
46 | /* for dgpus only */ | ||
47 | return vidmem_mask; | ||
48 | case APERTURE_INVALID: | ||
49 | WARN_ON("Bad aperture"); | ||
50 | } | ||
51 | return 0; | ||
52 | } | ||
53 | |||
54 | u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem, | ||
55 | u32 sysmem_mask, u32 vidmem_mask) | ||
56 | { | ||
57 | return __nvgpu_aperture_mask(g, mem->aperture, | ||
58 | sysmem_mask, vidmem_mask); | ||
59 | } | ||
60 | |||
37 | int nvgpu_mem_begin(struct gk20a *g, struct nvgpu_mem *mem) | 61 | int nvgpu_mem_begin(struct gk20a *g, struct nvgpu_mem *mem) |
38 | { | 62 | { |
39 | void *cpu_va; | 63 | void *cpu_va; |
40 | pgprot_t prot = nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) ? | 64 | pgprot_t prot = nvgpu_is_enabled(g, NVGPU_DMA_COHERENT) ? PAGE_KERNEL : |
41 | PAGE_KERNEL : | ||
42 | pgprot_writecombine(PAGE_KERNEL); | 65 | pgprot_writecombine(PAGE_KERNEL); |
43 | 66 | ||
44 | if (mem->aperture != APERTURE_SYSMEM || g->mm.force_pramin) | 67 | if (mem->aperture != APERTURE_SYSMEM || g->mm.force_pramin) |
45 | return 0; | 68 | return 0; |
46 | 69 | ||
47 | /* | 70 | /* |
48 | * WAR for bug 2040115: we already will always have a coherent vmap() | ||
49 | * for all sysmem buffers. The prot settings are left alone since | ||
50 | * eventually this should be deleted. | ||
51 | */ | ||
52 | if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) | ||
53 | return 0; | ||
54 | |||
55 | /* | ||
56 | * A CPU mapping is implicitly made for all SYSMEM DMA allocations that | 71 | * A CPU mapping is implicitly made for all SYSMEM DMA allocations that |
57 | * don't have NVGPU_DMA_NO_KERNEL_MAPPING. Thus we don't need to make | 72 | * don't have NVGPU_DMA_NO_KERNEL_MAPPING. Thus we don't need to make |
58 | * another CPU mapping. | 73 | * another CPU mapping. |
@@ -82,13 +97,6 @@ void nvgpu_mem_end(struct gk20a *g, struct nvgpu_mem *mem) | |||
82 | return; | 97 | return; |
83 | 98 | ||
84 | /* | 99 | /* |
85 | * WAR for bug 2040115: skip this since the map will be taken care of | ||
86 | * during the free in the DMA API. | ||
87 | */ | ||
88 | if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) | ||
89 | return; | ||
90 | |||
91 | /* | ||
92 | * Similar to nvgpu_mem_begin() we don't need to unmap the CPU mapping | 100 | * Similar to nvgpu_mem_begin() we don't need to unmap the CPU mapping |
93 | * already made by the DMA API. | 101 | * already made by the DMA API. |
94 | */ | 102 | */ |
@@ -307,8 +315,7 @@ void nvgpu_memset(struct gk20a *g, struct nvgpu_mem *mem, u32 offset, | |||
307 | */ | 315 | */ |
308 | u64 nvgpu_mem_get_addr_sgl(struct gk20a *g, struct scatterlist *sgl) | 316 | u64 nvgpu_mem_get_addr_sgl(struct gk20a *g, struct scatterlist *sgl) |
309 | { | 317 | { |
310 | if (nvgpu_is_enabled(g, NVGPU_MM_USE_PHYSICAL_SG) || | 318 | if (!nvgpu_iommuable(g)) |
311 | !nvgpu_iommuable(g)) | ||
312 | return g->ops.mm.gpu_phys_addr(g, NULL, sg_phys(sgl)); | 319 | return g->ops.mm.gpu_phys_addr(g, NULL, sg_phys(sgl)); |
313 | 320 | ||
314 | if (sg_dma_address(sgl) == 0) | 321 | if (sg_dma_address(sgl) == 0) |
@@ -408,12 +415,8 @@ int nvgpu_mem_create_from_mem(struct gk20a *g, | |||
408 | 415 | ||
409 | /* | 416 | /* |
410 | * Re-use the CPU mapping only if the mapping was made by the DMA API. | 417 | * Re-use the CPU mapping only if the mapping was made by the DMA API. |
411 | * | ||
412 | * Bug 2040115: the DMA API wrapper makes the mapping that we should | ||
413 | * re-use. | ||
414 | */ | 418 | */ |
415 | if (!(src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING) || | 419 | if (!(src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING)) |
416 | nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) | ||
417 | dest->cpu_va = src->cpu_va + (PAGE_SIZE * start_page); | 420 | dest->cpu_va = src->cpu_va + (PAGE_SIZE * start_page); |
418 | 421 | ||
419 | dest->priv.pages = src->priv.pages + start_page; | 422 | dest->priv.pages = src->priv.pages + start_page; |
diff --git a/drivers/gpu/nvgpu/common/linux/pci.c b/drivers/gpu/nvgpu/common/linux/pci.c index 973da9ca..6ebe8dda 100644 --- a/drivers/gpu/nvgpu/common/linux/pci.c +++ b/drivers/gpu/nvgpu/common/linux/pci.c | |||
@@ -17,13 +17,13 @@ | |||
17 | #include <linux/pci.h> | 17 | #include <linux/pci.h> |
18 | #include <linux/interrupt.h> | 18 | #include <linux/interrupt.h> |
19 | #include <linux/pm_runtime.h> | 19 | #include <linux/pm_runtime.h> |
20 | #include <linux/of_platform.h> | ||
21 | #include <linux/of_address.h> | ||
22 | 20 | ||
23 | #include <nvgpu/nvgpu_common.h> | 21 | #include <nvgpu/nvgpu_common.h> |
24 | #include <nvgpu/kmem.h> | 22 | #include <nvgpu/kmem.h> |
25 | #include <nvgpu/enabled.h> | 23 | #include <nvgpu/enabled.h> |
26 | #include <nvgpu/nvlink.h> | 24 | #include <nvgpu/nvlink.h> |
25 | #include <linux/of_platform.h> | ||
26 | #include <linux/of_address.h> | ||
27 | 27 | ||
28 | #include "gk20a/gk20a.h" | 28 | #include "gk20a/gk20a.h" |
29 | #include "clk/clk.h" | 29 | #include "clk/clk.h" |
@@ -566,12 +566,6 @@ static int nvgpu_pci_probe(struct pci_dev *pdev, | |||
566 | platform->g = g; | 566 | platform->g = g; |
567 | l->dev = &pdev->dev; | 567 | l->dev = &pdev->dev; |
568 | 568 | ||
569 | np = nvgpu_get_node(g); | ||
570 | if (of_dma_is_coherent(np)) { | ||
571 | __nvgpu_set_enabled(g, NVGPU_USE_COHERENT_SYSMEM, true); | ||
572 | __nvgpu_set_enabled(g, NVGPU_SUPPORT_IO_COHERENCE, true); | ||
573 | } | ||
574 | |||
575 | err = pci_enable_device(pdev); | 569 | err = pci_enable_device(pdev); |
576 | if (err) | 570 | if (err) |
577 | return err; | 571 | return err; |
@@ -650,6 +644,13 @@ static int nvgpu_pci_probe(struct pci_dev *pdev, | |||
650 | 644 | ||
651 | g->mm.has_physical_mode = false; | 645 | g->mm.has_physical_mode = false; |
652 | 646 | ||
647 | np = nvgpu_get_node(g); | ||
648 | |||
649 | if (of_dma_is_coherent(np)) { | ||
650 | __nvgpu_set_enabled(g, NVGPU_DMA_COHERENT, true); | ||
651 | __nvgpu_set_enabled(g, NVGPU_SUPPORT_IO_COHERENCE, true); | ||
652 | } | ||
653 | |||
653 | return 0; | 654 | return 0; |
654 | } | 655 | } |
655 | 656 | ||
diff --git a/drivers/gpu/nvgpu/common/linux/vm.c b/drivers/gpu/nvgpu/common/linux/vm.c index 52b2f30c..e3ca4eda 100644 --- a/drivers/gpu/nvgpu/common/linux/vm.c +++ b/drivers/gpu/nvgpu/common/linux/vm.c | |||
@@ -166,8 +166,7 @@ struct nvgpu_mapped_buf *nvgpu_vm_find_mapping(struct vm_gk20a *vm, | |||
166 | vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10, | 166 | vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10, |
167 | vm_aspace_id(vm), | 167 | vm_aspace_id(vm), |
168 | mapped_buffer->flags, | 168 | mapped_buffer->flags, |
169 | nvgpu_aperture_str(g, | 169 | nvgpu_aperture_str(gk20a_dmabuf_aperture(g, os_buf->dmabuf))); |
170 | gk20a_dmabuf_aperture(g, os_buf->dmabuf))); | ||
171 | 170 | ||
172 | return mapped_buffer; | 171 | return mapped_buffer; |
173 | } | 172 | } |