From 418f31cd91a5c3ca45f0920ed64205def49c8a80 Mon Sep 17 00:00:00 2001
From: Alex Waterman <alexw@nvidia.com>
Date: Tue, 6 Mar 2018 10:43:16 -0800
Subject: gpu: nvgpu: Enable IO coherency on GV100

This reverts commit 848af2ce6de6140323a6ffe3075bf8021e119434.

This is a revert of a revert, etc, etc. It re-enables IO coherence again.

JIRA EVLR-2333

Change-Id: Ibf97dce2f892e48a1200a06cd38a1c5d9603be04
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1669722
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/common/mm/gmmu.c      | 16 ++++++++++-
 drivers/gpu/nvgpu/common/mm/nvgpu_mem.c | 47 +++++++++++++++++++++++++++++++++
 2 files changed, 62 insertions(+), 1 deletion(-)

(limited to 'drivers/gpu/nvgpu/common/mm')

diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c
index e1942cbd..3b57e781 100644
--- a/drivers/gpu/nvgpu/common/mm/gmmu.c
+++ b/drivers/gpu/nvgpu/common/mm/gmmu.c
@@ -79,6 +79,13 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm,
 	if (!sgt)
 		return -ENOMEM;
 
+	/*
+	 * If the GPU is IO coherent and the DMA API is giving us IO coherent
+	 * CPU mappings then we gotta make sure we use the IO coherent aperture.
+	 */
+	if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
+		flags |= NVGPU_VM_MAP_IO_COHERENT;
+
 	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
 	vaddr = g->ops.mm.gmmu_map(vm, addr,
 				   sgt,    /* sg list */
@@ -627,7 +634,7 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
 		   page_size >> 10,
 		   nvgpu_gmmu_perm_str(attrs->rw_flag),
 		   attrs->kind_v,
-		   nvgpu_aperture_str(attrs->aperture),
+		   nvgpu_aperture_str(g, attrs->aperture),
 		   attrs->cacheable ? 'C' : '-',
 		   attrs->sparse    ? 'S' : '-',
 		   attrs->priv      ? 'P' : '-',
@@ -704,6 +711,13 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
 
 	attrs.l3_alloc = (bool)(flags & NVGPU_VM_MAP_L3_ALLOC);
 
+	/*
+	 * Handle the IO coherency aperture: make sure the .aperture field is
+	 * correct based on the IO coherency flag.
+	 */
+	if (attrs.coherent && attrs.aperture == APERTURE_SYSMEM)
+		attrs.aperture = __APERTURE_SYSMEM_COH;
+
 	/*
 	 * Only allocate a new GPU VA range if we haven't already been passed a
 	 * GPU VA range. This facilitates fixed mappings.
diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
index f7c51f42..2b32d869 100644
--- a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
+++ b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
@@ -28,6 +28,53 @@
 
 #include "gk20a/gk20a.h"
 
+/*
+ * Make sure to use the right coherency aperture if you use this function! This
+ * will not add any checks. If you want to simply use the default coherency then
+ * use nvgpu_aperture_mask().
+ */
+u32 __nvgpu_aperture_mask(struct gk20a *g, enum nvgpu_aperture aperture,
+			  u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask)
+{
+	/*
+	 * Some iGPUs treat sysmem (i.e SoC DRAM) as vidmem. In these cases the
+	 * "sysmem" aperture should really be translated to VIDMEM.
+	 */
+	if (!nvgpu_is_enabled(g, NVGPU_MM_HONORS_APERTURE))
+		aperture = APERTURE_VIDMEM;
+
+	switch (aperture) {
+	case __APERTURE_SYSMEM_COH:
+		return sysmem_coh_mask;
+	case APERTURE_SYSMEM:
+		return sysmem_mask;
+	case APERTURE_VIDMEM:
+		return vidmem_mask;
+	case APERTURE_INVALID:
+		WARN_ON("Bad aperture");
+	}
+	return 0;
+}
+
+u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem,
+			u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask)
+{
+	enum nvgpu_aperture ap = mem->aperture;
+
+	/*
+	 * Handle the coherent aperture: ideally most of the driver is not
+	 * aware of the difference between coherent and non-coherent sysmem so
+	 * we add this translation step here.
+	 */
+	if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) &&
+	    ap == APERTURE_SYSMEM)
+		ap = __APERTURE_SYSMEM_COH;
+
+	return __nvgpu_aperture_mask(g, ap,
+				     sysmem_mask, sysmem_coh_mask, vidmem_mask);
+}
+
+
 struct nvgpu_sgl *nvgpu_sgt_get_next(struct nvgpu_sgt *sgt,
 				     struct nvgpu_sgl *sgl)
 {
-- 
cgit v1.2.2