gpu: nvgpu: Enable IO coherency on GV100

This reverts commit 848af2ce6de6140323a6ffe3075bf8021e119434. This is a revert of a revert, etc, etc. It re-enables IO coherence again. JIRA EVLR-2333 Change-Id: Ibf97dce2f892e48a1200a06cd38a1c5d9603be04 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1669722 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: Alex Waterman <alexw@nvidia.com> 2018-03-06 13:43:16 -0500
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2018-03-07 21:04:41 -0500
commit: 418f31cd91a5c3ca45f0920ed64205def49c8a80 (patch)
tree: 17e3e04065679788aeeff645842866df0d59ccd0 /drivers/gpu/nvgpu/common/mm
parent: f85a0d3e00b53453f3d5ca556f15465078473f31 (diff)
2 files changed, 62 insertions, 1 deletions
diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c
index e1942cbd..3b57e781 100644
--- a/drivers/gpu/nvgpu/common/mm/gmmu.c
+++ b/drivers/gpu/nvgpu/common/mm/gmmu.c
@@ -79,6 +79,13 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm,
        if (!sgt)
                return -ENOMEM;
+        /*
+         * If the GPU is IO coherent and the DMA API is giving us IO coherent
+         * CPU mappings then we gotta make sure we use the IO coherent aperture.
+         */
+        if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
+                flags |= NVGPU_VM_MAP_IO_COHERENT;
        nvgpu_mutex_acquire(&vm->update_gmmu_lock);
        vaddr = g->ops.mm.gmmu_map(vm, addr,
                                   sgt,    /* sg list */
@@ -627,7 +634,7 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
                   page_size >> 10,
                   nvgpu_gmmu_perm_str(attrs->rw_flag),
                   attrs->kind_v,
-                   nvgpu_aperture_str(attrs->aperture),
+                   nvgpu_aperture_str(g, attrs->aperture),
                   attrs->cacheable ? 'C' : '-',
                   attrs->sparse    ? 'S' : '-',
                   attrs->priv      ? 'P' : '-',
@@ -705,6 +712,13 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
        attrs.l3_alloc = (bool)(flags & NVGPU_VM_MAP_L3_ALLOC);
        /*
+         * Handle the IO coherency aperture: make sure the .aperture field is
+         * correct based on the IO coherency flag.
+         */
+        if (attrs.coherent && attrs.aperture == APERTURE_SYSMEM)
+                attrs.aperture = __APERTURE_SYSMEM_COH;
+        /*
         * Only allocate a new GPU VA range if we haven't already been passed a
         * GPU VA range. This facilitates fixed mappings.
         */
diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
index f7c51f42..2b32d869 100644
--- a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
+++ b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
@@ -28,6 +28,53 @@
 #include "gk20a/gk20a.h"
+/*
+ * Make sure to use the right coherency aperture if you use this function! This
+ * will not add any checks. If you want to simply use the default coherency then
+ * use nvgpu_aperture_mask().
+ */
+u32 __nvgpu_aperture_mask(struct gk20a *g, enum nvgpu_aperture aperture,
+                          u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask)
+{
+        /*
+         * Some iGPUs treat sysmem (i.e SoC DRAM) as vidmem. In these cases the
+         * "sysmem" aperture should really be translated to VIDMEM.
+         */
+        if (!nvgpu_is_enabled(g, NVGPU_MM_HONORS_APERTURE))
+                aperture = APERTURE_VIDMEM;
+        switch (aperture) {
+        case __APERTURE_SYSMEM_COH:
+                return sysmem_coh_mask;
+        case APERTURE_SYSMEM:
+                return sysmem_mask;
+        case APERTURE_VIDMEM:
+                return vidmem_mask;
+        case APERTURE_INVALID:
+                WARN_ON("Bad aperture");
+        }
+        return 0;
+}
+u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem,
+                        u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask)
+{
+        enum nvgpu_aperture ap = mem->aperture;
+        /*
+         * Handle the coherent aperture: ideally most of the driver is not
+         * aware of the difference between coherent and non-coherent sysmem so
+         * we add this translation step here.
+         */
+        if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) &&
+            ap == APERTURE_SYSMEM)
+                ap = __APERTURE_SYSMEM_COH;
+        return __nvgpu_aperture_mask(g, ap,
+                                     sysmem_mask, sysmem_coh_mask, vidmem_mask);
+}
 struct nvgpu_sgl *nvgpu_sgt_get_next(struct nvgpu_sgt *sgt,
                                     struct nvgpu_sgl *sgl)
 {
author	Alex Waterman <alexw@nvidia.com>	2018-03-06 13:43:16 -0500
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2018-03-07 21:04:41 -0500
commit	418f31cd91a5c3ca45f0920ed64205def49c8a80 (patch)
tree	17e3e04065679788aeeff645842866df0d59ccd0 /drivers/gpu/nvgpu/common/mm
parent	f85a0d3e00b53453f3d5ca556f15465078473f31 (diff)

diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c index e1942cbd..3b57e781 100644 --- a/drivers/gpu/nvgpu/common/mm/gmmu.c +++ b/drivers/gpu/nvgpu/common/mm/gmmu.c
@@ -79,6 +79,13 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm,
79	if (!sgt)	79	if (!sgt)
80	return -ENOMEM;	80	return -ENOMEM;
81		81
		82	/*
		83	* If the GPU is IO coherent and the DMA API is giving us IO coherent
		84	* CPU mappings then we gotta make sure we use the IO coherent aperture.
		85	*/
		86	if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
		87	flags \|= NVGPU_VM_MAP_IO_COHERENT;
		88
82	nvgpu_mutex_acquire(&vm->update_gmmu_lock);	89	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
83	vaddr = g->ops.mm.gmmu_map(vm, addr,	90	vaddr = g->ops.mm.gmmu_map(vm, addr,
84	sgt, /* sg list */	91	sgt, /* sg list */
@@ -627,7 +634,7 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
627	page_size >> 10,	634	page_size >> 10,
628	nvgpu_gmmu_perm_str(attrs->rw_flag),	635	nvgpu_gmmu_perm_str(attrs->rw_flag),
629	attrs->kind_v,	636	attrs->kind_v,
630	nvgpu_aperture_str(attrs->aperture),	637	nvgpu_aperture_str(g, attrs->aperture),
631	attrs->cacheable ? 'C' : '-',	638	attrs->cacheable ? 'C' : '-',
632	attrs->sparse ? 'S' : '-',	639	attrs->sparse ? 'S' : '-',
633	attrs->priv ? 'P' : '-',	640	attrs->priv ? 'P' : '-',
@@ -705,6 +712,13 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
705	attrs.l3_alloc = (bool)(flags & NVGPU_VM_MAP_L3_ALLOC);	712	attrs.l3_alloc = (bool)(flags & NVGPU_VM_MAP_L3_ALLOC);
706		713
707	/*	714	/*
		715	* Handle the IO coherency aperture: make sure the .aperture field is
		716	* correct based on the IO coherency flag.
		717	*/
		718	if (attrs.coherent && attrs.aperture == APERTURE_SYSMEM)
		719	attrs.aperture = __APERTURE_SYSMEM_COH;
		720
		721	/*
708	* Only allocate a new GPU VA range if we haven't already been passed a	722	* Only allocate a new GPU VA range if we haven't already been passed a
709	* GPU VA range. This facilitates fixed mappings.	723	* GPU VA range. This facilitates fixed mappings.
710	*/	724	*/


diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c index f7c51f42..2b32d869 100644 --- a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c +++ b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
@@ -28,6 +28,53 @@
28		28
29	#include "gk20a/gk20a.h"	29	#include "gk20a/gk20a.h"
30		30
		31	/*
		32	* Make sure to use the right coherency aperture if you use this function! This
		33	* will not add any checks. If you want to simply use the default coherency then
		34	* use nvgpu_aperture_mask().
		35	*/
		36	u32 __nvgpu_aperture_mask(struct gk20a *g, enum nvgpu_aperture aperture,
		37	u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask)
		38	{
		39	/*
		40	* Some iGPUs treat sysmem (i.e SoC DRAM) as vidmem. In these cases the
		41	* "sysmem" aperture should really be translated to VIDMEM.
		42	*/
		43	if (!nvgpu_is_enabled(g, NVGPU_MM_HONORS_APERTURE))
		44	aperture = APERTURE_VIDMEM;
		45
		46	switch (aperture) {
		47	case __APERTURE_SYSMEM_COH:
		48	return sysmem_coh_mask;
		49	case APERTURE_SYSMEM:
		50	return sysmem_mask;
		51	case APERTURE_VIDMEM:
		52	return vidmem_mask;
		53	case APERTURE_INVALID:
		54	WARN_ON("Bad aperture");
		55	}
		56	return 0;
		57	}
		58
		59	u32 nvgpu_aperture_mask(struct gk20a g, struct nvgpu_mem mem,
		60	u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask)
		61	{
		62	enum nvgpu_aperture ap = mem->aperture;
		63
		64	/*
		65	* Handle the coherent aperture: ideally most of the driver is not
		66	* aware of the difference between coherent and non-coherent sysmem so
		67	* we add this translation step here.
		68	*/
		69	if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) &&
		70	ap == APERTURE_SYSMEM)
		71	ap = __APERTURE_SYSMEM_COH;
		72
		73	return __nvgpu_aperture_mask(g, ap,
		74	sysmem_mask, sysmem_coh_mask, vidmem_mask);
		75	}
		76
		77
31	struct nvgpu_sgl nvgpu_sgt_get_next(struct nvgpu_sgt sgt,	78	struct nvgpu_sgl nvgpu_sgt_get_next(struct nvgpu_sgt sgt,
32	struct nvgpu_sgl *sgl)	79	struct nvgpu_sgl *sgl)
33	{	80	{