summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/common
diff options
context:
space:
mode:
authorAlex Waterman <alexw@nvidia.com>2018-03-06 13:43:16 -0500
committermobile promotions <svcmobile_promotions@nvidia.com>2018-03-07 21:04:41 -0500
commit418f31cd91a5c3ca45f0920ed64205def49c8a80 (patch)
tree17e3e04065679788aeeff645842866df0d59ccd0 /drivers/gpu/nvgpu/common
parentf85a0d3e00b53453f3d5ca556f15465078473f31 (diff)
gpu: nvgpu: Enable IO coherency on GV100
This reverts commit 848af2ce6de6140323a6ffe3075bf8021e119434. This is a revert of a revert, etc, etc. It re-enables IO coherence again. JIRA EVLR-2333 Change-Id: Ibf97dce2f892e48a1200a06cd38a1c5d9603be04 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1669722 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/common')
-rw-r--r--drivers/gpu/nvgpu/common/linux/dma.c34
-rw-r--r--drivers/gpu/nvgpu/common/linux/module.c8
-rw-r--r--drivers/gpu/nvgpu/common/linux/nvgpu_mem.c51
-rw-r--r--drivers/gpu/nvgpu/common/linux/pci.c17
-rw-r--r--drivers/gpu/nvgpu/common/linux/vm.c3
-rw-r--r--drivers/gpu/nvgpu/common/mm/gmmu.c16
-rw-r--r--drivers/gpu/nvgpu/common/mm/nvgpu_mem.c47
7 files changed, 135 insertions, 41 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/dma.c b/drivers/gpu/nvgpu/common/linux/dma.c
index c111d22b..53e54bc6 100644
--- a/drivers/gpu/nvgpu/common/linux/dma.c
+++ b/drivers/gpu/nvgpu/common/linux/dma.c
@@ -227,6 +227,16 @@ int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags,
227 } 227 }
228 228
229 /* 229 /*
230 * WAR for IO coherent chips: the DMA API does not seem to generate
231 * mappings that work correctly. Unclear why - Bug ID: 2040115.
232 *
233 * Basically we just tell the DMA API not to map with NO_KERNEL_MAPPING
234 * and then make a vmap() ourselves.
235 */
236 if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
237 flags |= NVGPU_DMA_NO_KERNEL_MAPPING;
238
239 /*
230 * Before the debug print so we see this in the total. But during 240 * Before the debug print so we see this in the total. But during
231 * cleanup in the fail path this has to be subtracted. 241 * cleanup in the fail path this has to be subtracted.
232 */ 242 */
@@ -260,7 +270,17 @@ int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags,
260 iova, size, flags); 270 iova, size, flags);
261 } 271 }
262 if (err) 272 if (err)
263 goto fail_free; 273 goto fail_free_dma;
274
275 if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) {
276 mem->cpu_va = vmap(mem->priv.pages,
277 size >> PAGE_SHIFT,
278 0, PAGE_KERNEL);
279 if (!mem->cpu_va) {
280 err = -ENOMEM;
281 goto fail_free_sgt;
282 }
283 }
264 284
265 mem->aligned_size = size; 285 mem->aligned_size = size;
266 mem->aperture = APERTURE_SYSMEM; 286 mem->aperture = APERTURE_SYSMEM;
@@ -270,12 +290,14 @@ int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags,
270 290
271 return 0; 291 return 0;
272 292
273fail_free: 293fail_free_sgt:
274 g->dma_memory_used -= mem->aligned_size; 294 nvgpu_free_sgtable(g, &mem->priv.sgt);
295fail_free_dma:
275 dma_free_attrs(d, size, alloc_ret, iova, NVGPU_DMA_ATTR(dma_attrs)); 296 dma_free_attrs(d, size, alloc_ret, iova, NVGPU_DMA_ATTR(dma_attrs));
276 mem->cpu_va = NULL; 297 mem->cpu_va = NULL;
277 mem->priv.sgt = NULL; 298 mem->priv.sgt = NULL;
278 mem->size = 0; 299 mem->size = 0;
300 g->dma_memory_used -= mem->aligned_size;
279 return err; 301 return err;
280} 302}
281 303
@@ -476,6 +498,12 @@ static void nvgpu_dma_free_sys(struct gk20a *g, struct nvgpu_mem *mem)
476 if (!(mem->mem_flags & NVGPU_MEM_FLAG_SHADOW_COPY) && 498 if (!(mem->mem_flags & NVGPU_MEM_FLAG_SHADOW_COPY) &&
477 !(mem->mem_flags & __NVGPU_MEM_FLAG_NO_DMA) && 499 !(mem->mem_flags & __NVGPU_MEM_FLAG_NO_DMA) &&
478 (mem->cpu_va || mem->priv.pages)) { 500 (mem->cpu_va || mem->priv.pages)) {
501 /*
502 * Free side of WAR for bug 2040115.
503 */
504 if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
505 vunmap(mem->cpu_va);
506
479 if (mem->priv.flags) { 507 if (mem->priv.flags) {
480 NVGPU_DEFINE_DMA_ATTRS(dma_attrs); 508 NVGPU_DEFINE_DMA_ATTRS(dma_attrs);
481 509
diff --git a/drivers/gpu/nvgpu/common/linux/module.c b/drivers/gpu/nvgpu/common/linux/module.c
index 59691c64..92e47e2d 100644
--- a/drivers/gpu/nvgpu/common/linux/module.c
+++ b/drivers/gpu/nvgpu/common/linux/module.c
@@ -20,6 +20,7 @@
20#include <linux/of.h> 20#include <linux/of.h>
21#include <linux/of_device.h> 21#include <linux/of_device.h>
22#include <linux/of_platform.h> 22#include <linux/of_platform.h>
23#include <linux/of_address.h>
23#include <linux/interrupt.h> 24#include <linux/interrupt.h>
24#include <linux/pm_runtime.h> 25#include <linux/pm_runtime.h>
25#include <linux/reset.h> 26#include <linux/reset.h>
@@ -1111,6 +1112,7 @@ static int gk20a_probe(struct platform_device *dev)
1111 struct gk20a *gk20a; 1112 struct gk20a *gk20a;
1112 int err; 1113 int err;
1113 struct gk20a_platform *platform = NULL; 1114 struct gk20a_platform *platform = NULL;
1115 struct device_node *np;
1114 1116
1115 if (dev->dev.of_node) { 1117 if (dev->dev.of_node) {
1116 const struct of_device_id *match; 1118 const struct of_device_id *match;
@@ -1151,6 +1153,12 @@ static int gk20a_probe(struct platform_device *dev)
1151 if (err) 1153 if (err)
1152 goto return_err; 1154 goto return_err;
1153 1155
1156 np = nvgpu_get_node(gk20a);
1157 if (of_dma_is_coherent(np)) {
1158 __nvgpu_set_enabled(gk20a, NVGPU_USE_COHERENT_SYSMEM, true);
1159 __nvgpu_set_enabled(gk20a, NVGPU_SUPPORT_IO_COHERENCE, true);
1160 }
1161
1154 if (nvgpu_platform_is_simulation(gk20a)) 1162 if (nvgpu_platform_is_simulation(gk20a))
1155 __nvgpu_set_enabled(gk20a, NVGPU_IS_FMODEL, true); 1163 __nvgpu_set_enabled(gk20a, NVGPU_IS_FMODEL, true);
1156 1164
diff --git a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
index 7406c4d7..d4549e1b 100644
--- a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
+++ b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
@@ -34,40 +34,25 @@
34#include "gk20a/gk20a.h" 34#include "gk20a/gk20a.h"
35#include "gk20a/mm_gk20a.h" 35#include "gk20a/mm_gk20a.h"
36 36
37u32 __nvgpu_aperture_mask(struct gk20a *g, enum nvgpu_aperture aperture,
38 u32 sysmem_mask, u32 vidmem_mask)
39{
40 switch (aperture) {
41 case APERTURE_SYSMEM:
42 /* some igpus consider system memory vidmem */
43 return nvgpu_is_enabled(g, NVGPU_MM_HONORS_APERTURE)
44 ? sysmem_mask : vidmem_mask;
45 case APERTURE_VIDMEM:
46 /* for dgpus only */
47 return vidmem_mask;
48 case APERTURE_INVALID:
49 WARN_ON("Bad aperture");
50 }
51 return 0;
52}
53
54u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem,
55 u32 sysmem_mask, u32 vidmem_mask)
56{
57 return __nvgpu_aperture_mask(g, mem->aperture,
58 sysmem_mask, vidmem_mask);
59}
60
61int nvgpu_mem_begin(struct gk20a *g, struct nvgpu_mem *mem) 37int nvgpu_mem_begin(struct gk20a *g, struct nvgpu_mem *mem)
62{ 38{
63 void *cpu_va; 39 void *cpu_va;
64 pgprot_t prot = nvgpu_is_enabled(g, NVGPU_DMA_COHERENT) ? PAGE_KERNEL : 40 pgprot_t prot = nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) ?
41 PAGE_KERNEL :
65 pgprot_writecombine(PAGE_KERNEL); 42 pgprot_writecombine(PAGE_KERNEL);
66 43
67 if (mem->aperture != APERTURE_SYSMEM || g->mm.force_pramin) 44 if (mem->aperture != APERTURE_SYSMEM || g->mm.force_pramin)
68 return 0; 45 return 0;
69 46
70 /* 47 /*
48 * WAR for bug 2040115: we already will always have a coherent vmap()
49 * for all sysmem buffers. The prot settings are left alone since
50 * eventually this should be deleted.
51 */
52 if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
53 return 0;
54
55 /*
71 * A CPU mapping is implicitly made for all SYSMEM DMA allocations that 56 * A CPU mapping is implicitly made for all SYSMEM DMA allocations that
72 * don't have NVGPU_DMA_NO_KERNEL_MAPPING. Thus we don't need to make 57 * don't have NVGPU_DMA_NO_KERNEL_MAPPING. Thus we don't need to make
73 * another CPU mapping. 58 * another CPU mapping.
@@ -97,6 +82,13 @@ void nvgpu_mem_end(struct gk20a *g, struct nvgpu_mem *mem)
97 return; 82 return;
98 83
99 /* 84 /*
85 * WAR for bug 2040115: skip this since the map will be taken care of
86 * during the free in the DMA API.
87 */
88 if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
89 return;
90
91 /*
100 * Similar to nvgpu_mem_begin() we don't need to unmap the CPU mapping 92 * Similar to nvgpu_mem_begin() we don't need to unmap the CPU mapping
101 * already made by the DMA API. 93 * already made by the DMA API.
102 */ 94 */
@@ -315,7 +307,8 @@ void nvgpu_memset(struct gk20a *g, struct nvgpu_mem *mem, u32 offset,
315 */ 307 */
316u64 nvgpu_mem_get_addr_sgl(struct gk20a *g, struct scatterlist *sgl) 308u64 nvgpu_mem_get_addr_sgl(struct gk20a *g, struct scatterlist *sgl)
317{ 309{
318 if (!nvgpu_iommuable(g)) 310 if (nvgpu_is_enabled(g, NVGPU_MM_USE_PHYSICAL_SG) ||
311 !nvgpu_iommuable(g))
319 return g->ops.mm.gpu_phys_addr(g, NULL, sg_phys(sgl)); 312 return g->ops.mm.gpu_phys_addr(g, NULL, sg_phys(sgl));
320 313
321 if (sg_dma_address(sgl) == 0) 314 if (sg_dma_address(sgl) == 0)
@@ -415,8 +408,12 @@ int nvgpu_mem_create_from_mem(struct gk20a *g,
415 408
416 /* 409 /*
417 * Re-use the CPU mapping only if the mapping was made by the DMA API. 410 * Re-use the CPU mapping only if the mapping was made by the DMA API.
411 *
412 * Bug 2040115: the DMA API wrapper makes the mapping that we should
413 * re-use.
418 */ 414 */
419 if (!(src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING)) 415 if (!(src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING) ||
416 nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
420 dest->cpu_va = src->cpu_va + (PAGE_SIZE * start_page); 417 dest->cpu_va = src->cpu_va + (PAGE_SIZE * start_page);
421 418
422 dest->priv.pages = src->priv.pages + start_page; 419 dest->priv.pages = src->priv.pages + start_page;
diff --git a/drivers/gpu/nvgpu/common/linux/pci.c b/drivers/gpu/nvgpu/common/linux/pci.c
index 6ebe8dda..973da9ca 100644
--- a/drivers/gpu/nvgpu/common/linux/pci.c
+++ b/drivers/gpu/nvgpu/common/linux/pci.c
@@ -17,13 +17,13 @@
17#include <linux/pci.h> 17#include <linux/pci.h>
18#include <linux/interrupt.h> 18#include <linux/interrupt.h>
19#include <linux/pm_runtime.h> 19#include <linux/pm_runtime.h>
20#include <linux/of_platform.h>
21#include <linux/of_address.h>
20 22
21#include <nvgpu/nvgpu_common.h> 23#include <nvgpu/nvgpu_common.h>
22#include <nvgpu/kmem.h> 24#include <nvgpu/kmem.h>
23#include <nvgpu/enabled.h> 25#include <nvgpu/enabled.h>
24#include <nvgpu/nvlink.h> 26#include <nvgpu/nvlink.h>
25#include <linux/of_platform.h>
26#include <linux/of_address.h>
27 27
28#include "gk20a/gk20a.h" 28#include "gk20a/gk20a.h"
29#include "clk/clk.h" 29#include "clk/clk.h"
@@ -566,6 +566,12 @@ static int nvgpu_pci_probe(struct pci_dev *pdev,
566 platform->g = g; 566 platform->g = g;
567 l->dev = &pdev->dev; 567 l->dev = &pdev->dev;
568 568
569 np = nvgpu_get_node(g);
570 if (of_dma_is_coherent(np)) {
571 __nvgpu_set_enabled(g, NVGPU_USE_COHERENT_SYSMEM, true);
572 __nvgpu_set_enabled(g, NVGPU_SUPPORT_IO_COHERENCE, true);
573 }
574
569 err = pci_enable_device(pdev); 575 err = pci_enable_device(pdev);
570 if (err) 576 if (err)
571 return err; 577 return err;
@@ -644,13 +650,6 @@ static int nvgpu_pci_probe(struct pci_dev *pdev,
644 650
645 g->mm.has_physical_mode = false; 651 g->mm.has_physical_mode = false;
646 652
647 np = nvgpu_get_node(g);
648
649 if (of_dma_is_coherent(np)) {
650 __nvgpu_set_enabled(g, NVGPU_DMA_COHERENT, true);
651 __nvgpu_set_enabled(g, NVGPU_SUPPORT_IO_COHERENCE, true);
652 }
653
654 return 0; 653 return 0;
655} 654}
656 655
diff --git a/drivers/gpu/nvgpu/common/linux/vm.c b/drivers/gpu/nvgpu/common/linux/vm.c
index e3ca4eda..52b2f30c 100644
--- a/drivers/gpu/nvgpu/common/linux/vm.c
+++ b/drivers/gpu/nvgpu/common/linux/vm.c
@@ -166,7 +166,8 @@ struct nvgpu_mapped_buf *nvgpu_vm_find_mapping(struct vm_gk20a *vm,
166 vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10, 166 vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10,
167 vm_aspace_id(vm), 167 vm_aspace_id(vm),
168 mapped_buffer->flags, 168 mapped_buffer->flags,
169 nvgpu_aperture_str(gk20a_dmabuf_aperture(g, os_buf->dmabuf))); 169 nvgpu_aperture_str(g,
170 gk20a_dmabuf_aperture(g, os_buf->dmabuf)));
170 171
171 return mapped_buffer; 172 return mapped_buffer;
172} 173}
diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c
index e1942cbd..3b57e781 100644
--- a/drivers/gpu/nvgpu/common/mm/gmmu.c
+++ b/drivers/gpu/nvgpu/common/mm/gmmu.c
@@ -79,6 +79,13 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm,
79 if (!sgt) 79 if (!sgt)
80 return -ENOMEM; 80 return -ENOMEM;
81 81
82 /*
83 * If the GPU is IO coherent and the DMA API is giving us IO coherent
84 * CPU mappings then we gotta make sure we use the IO coherent aperture.
85 */
86 if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
87 flags |= NVGPU_VM_MAP_IO_COHERENT;
88
82 nvgpu_mutex_acquire(&vm->update_gmmu_lock); 89 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
83 vaddr = g->ops.mm.gmmu_map(vm, addr, 90 vaddr = g->ops.mm.gmmu_map(vm, addr,
84 sgt, /* sg list */ 91 sgt, /* sg list */
@@ -627,7 +634,7 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
627 page_size >> 10, 634 page_size >> 10,
628 nvgpu_gmmu_perm_str(attrs->rw_flag), 635 nvgpu_gmmu_perm_str(attrs->rw_flag),
629 attrs->kind_v, 636 attrs->kind_v,
630 nvgpu_aperture_str(attrs->aperture), 637 nvgpu_aperture_str(g, attrs->aperture),
631 attrs->cacheable ? 'C' : '-', 638 attrs->cacheable ? 'C' : '-',
632 attrs->sparse ? 'S' : '-', 639 attrs->sparse ? 'S' : '-',
633 attrs->priv ? 'P' : '-', 640 attrs->priv ? 'P' : '-',
@@ -705,6 +712,13 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
705 attrs.l3_alloc = (bool)(flags & NVGPU_VM_MAP_L3_ALLOC); 712 attrs.l3_alloc = (bool)(flags & NVGPU_VM_MAP_L3_ALLOC);
706 713
707 /* 714 /*
715 * Handle the IO coherency aperture: make sure the .aperture field is
716 * correct based on the IO coherency flag.
717 */
718 if (attrs.coherent && attrs.aperture == APERTURE_SYSMEM)
719 attrs.aperture = __APERTURE_SYSMEM_COH;
720
721 /*
708 * Only allocate a new GPU VA range if we haven't already been passed a 722 * Only allocate a new GPU VA range if we haven't already been passed a
709 * GPU VA range. This facilitates fixed mappings. 723 * GPU VA range. This facilitates fixed mappings.
710 */ 724 */
diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
index f7c51f42..2b32d869 100644
--- a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
+++ b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
@@ -28,6 +28,53 @@
28 28
29#include "gk20a/gk20a.h" 29#include "gk20a/gk20a.h"
30 30
31/*
32 * Make sure to use the right coherency aperture if you use this function! This
33 * will not add any checks. If you want to simply use the default coherency then
34 * use nvgpu_aperture_mask().
35 */
36u32 __nvgpu_aperture_mask(struct gk20a *g, enum nvgpu_aperture aperture,
37 u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask)
38{
39 /*
40 * Some iGPUs treat sysmem (i.e SoC DRAM) as vidmem. In these cases the
41 * "sysmem" aperture should really be translated to VIDMEM.
42 */
43 if (!nvgpu_is_enabled(g, NVGPU_MM_HONORS_APERTURE))
44 aperture = APERTURE_VIDMEM;
45
46 switch (aperture) {
47 case __APERTURE_SYSMEM_COH:
48 return sysmem_coh_mask;
49 case APERTURE_SYSMEM:
50 return sysmem_mask;
51 case APERTURE_VIDMEM:
52 return vidmem_mask;
53 case APERTURE_INVALID:
54 WARN_ON("Bad aperture");
55 }
56 return 0;
57}
58
59u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem,
60 u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask)
61{
62 enum nvgpu_aperture ap = mem->aperture;
63
64 /*
65 * Handle the coherent aperture: ideally most of the driver is not
66 * aware of the difference between coherent and non-coherent sysmem so
67 * we add this translation step here.
68 */
69 if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) &&
70 ap == APERTURE_SYSMEM)
71 ap = __APERTURE_SYSMEM_COH;
72
73 return __nvgpu_aperture_mask(g, ap,
74 sysmem_mask, sysmem_coh_mask, vidmem_mask);
75}
76
77
31struct nvgpu_sgl *nvgpu_sgt_get_next(struct nvgpu_sgt *sgt, 78struct nvgpu_sgl *nvgpu_sgt_get_next(struct nvgpu_sgt *sgt,
32 struct nvgpu_sgl *sgl) 79 struct nvgpu_sgl *sgl)
33{ 80{