From 5a35a95654d561fce09a3b9abf6b82bb7a29d74b Mon Sep 17 00:00:00 2001
From: Alex Waterman <alexw@nvidia.com>
Date: Wed, 28 Feb 2018 09:19:19 -0800
Subject: Revert "gpu: nvgpu: Get coherency on gv100 + NVLINK working"

Also revert other changes related to IO coherence. This may be the
culprit in a recent dev-kernel lockdown.

Bug 2070609

Change-Id: Ida178aef161fadbc6db9512521ea51c702c1564b
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1665914
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Srikar Srimath Tirumala <srikars@nvidia.com>
---
 drivers/gpu/nvgpu/common/linux/dma.c       | 34 ++------------------
 drivers/gpu/nvgpu/common/linux/module.c    |  8 -----
 drivers/gpu/nvgpu/common/linux/nvgpu_mem.c | 51 ++++++++++++++++--------------
 drivers/gpu/nvgpu/common/linux/pci.c       | 17 +++++-----
 drivers/gpu/nvgpu/common/linux/vm.c        |  3 +-
 drivers/gpu/nvgpu/common/mm/gmmu.c         | 16 +---------
 drivers/gpu/nvgpu/common/mm/nvgpu_mem.c    | 46 ---------------------------
 7 files changed, 41 insertions(+), 134 deletions(-)

(limited to 'drivers/gpu/nvgpu/common')

diff --git a/drivers/gpu/nvgpu/common/linux/dma.c b/drivers/gpu/nvgpu/common/linux/dma.c
index 81aebb7d..c13dae8b 100644
--- a/drivers/gpu/nvgpu/common/linux/dma.c
+++ b/drivers/gpu/nvgpu/common/linux/dma.c
@@ -221,16 +221,6 @@ int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags,
 	NVGPU_DEFINE_DMA_ATTRS(dma_attrs);
 	void *alloc_ret;
 
-	/*
-	 * WAR for IO coherent chips: the DMA API does not seem to generate
-	 * mappings that work correctly. Unclear why - Bug ID: 2040115.
-	 *
-	 * Basically we just tell the DMA API not to map with NO_KERNEL_MAPPING
-	 * and then make a vmap() ourselves.
-	 */
-	if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
-		flags |= NVGPU_DMA_NO_KERNEL_MAPPING;
-
 	/*
 	 * Before the debug print so we see this in the total. But during
 	 * cleanup in the fail path this has to be subtracted.
@@ -265,17 +255,7 @@ int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags,
 					iova, size, flags);
 	}
 	if (err)
-		goto fail_free_dma;
-
-	if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) {
-		mem->cpu_va = vmap(mem->priv.pages,
-				   size >> PAGE_SHIFT,
-				   0, PAGE_KERNEL);
-		if (!mem->cpu_va) {
-			err = -ENOMEM;
-			goto fail_free_sgt;
-		}
-	}
+		goto fail_free;
 
 	mem->aligned_size = size;
 	mem->aperture = APERTURE_SYSMEM;
@@ -285,14 +265,12 @@ int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags,
 
 	return 0;
 
-fail_free_sgt:
-	nvgpu_free_sgtable(g, &mem->priv.sgt);
-fail_free_dma:
+fail_free:
+	g->dma_memory_used -= mem->aligned_size;
 	dma_free_attrs(d, size, alloc_ret, iova, NVGPU_DMA_ATTR(dma_attrs));
 	mem->cpu_va = NULL;
 	mem->priv.sgt = NULL;
 	mem->size = 0;
-	g->dma_memory_used -= mem->aligned_size;
 	return err;
 }
 
@@ -488,12 +466,6 @@ static void nvgpu_dma_free_sys(struct gk20a *g, struct nvgpu_mem *mem)
 	if (!(mem->mem_flags & NVGPU_MEM_FLAG_SHADOW_COPY) &&
 	    !(mem->mem_flags & __NVGPU_MEM_FLAG_NO_DMA) &&
 	    (mem->cpu_va || mem->priv.pages)) {
-		/*
-		 * Free side of WAR for bug 2040115.
-		 */
-		if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
-			vunmap(mem->cpu_va);
-
 		if (mem->priv.flags) {
 			NVGPU_DEFINE_DMA_ATTRS(dma_attrs);
 
diff --git a/drivers/gpu/nvgpu/common/linux/module.c b/drivers/gpu/nvgpu/common/linux/module.c
index 741c86e7..b103fcea 100644
--- a/drivers/gpu/nvgpu/common/linux/module.c
+++ b/drivers/gpu/nvgpu/common/linux/module.c
@@ -20,7 +20,6 @@
 #include <linux/of.h>
 #include <linux/of_device.h>
 #include <linux/of_platform.h>
-#include <linux/of_address.h>
 #include <linux/interrupt.h>
 #include <linux/pm_runtime.h>
 #include <linux/reset.h>
@@ -1108,7 +1107,6 @@ static int gk20a_probe(struct platform_device *dev)
 	struct gk20a *gk20a;
 	int err;
 	struct gk20a_platform *platform = NULL;
-	struct device_node *np;
 
 	if (dev->dev.of_node) {
 		const struct of_device_id *match;
@@ -1149,12 +1147,6 @@ static int gk20a_probe(struct platform_device *dev)
 	if (err)
 		goto return_err;
 
-	np = nvgpu_get_node(gk20a);
-	if (of_dma_is_coherent(np)) {
-		__nvgpu_set_enabled(gk20a, NVGPU_USE_COHERENT_SYSMEM, true);
-		__nvgpu_set_enabled(gk20a, NVGPU_SUPPORT_IO_COHERENCE, true);
-	}
-
 	if (nvgpu_platform_is_simulation(gk20a))
 		__nvgpu_set_enabled(gk20a, NVGPU_IS_FMODEL, true);
 
diff --git a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
index 69897694..206b83e1 100644
--- a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
+++ b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
@@ -34,24 +34,39 @@
 #include "gk20a/gk20a.h"
 #include "gk20a/mm_gk20a.h"
 
+u32 __nvgpu_aperture_mask(struct gk20a *g, enum nvgpu_aperture aperture,
+		u32 sysmem_mask, u32 vidmem_mask)
+{
+	switch (aperture) {
+	case APERTURE_SYSMEM:
+		/* some igpus consider system memory vidmem */
+		return nvgpu_is_enabled(g, NVGPU_MM_HONORS_APERTURE)
+			? sysmem_mask : vidmem_mask;
+	case APERTURE_VIDMEM:
+		/* for dgpus only */
+		return vidmem_mask;
+	case APERTURE_INVALID:
+		WARN_ON("Bad aperture");
+	}
+	return 0;
+}
+
+u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem,
+		u32 sysmem_mask, u32 vidmem_mask)
+{
+	return __nvgpu_aperture_mask(g, mem->aperture,
+			sysmem_mask, vidmem_mask);
+}
+
 int nvgpu_mem_begin(struct gk20a *g, struct nvgpu_mem *mem)
 {
 	void *cpu_va;
-	pgprot_t prot = nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) ?
-		PAGE_KERNEL :
+	pgprot_t prot = nvgpu_is_enabled(g, NVGPU_DMA_COHERENT) ? PAGE_KERNEL :
 		pgprot_writecombine(PAGE_KERNEL);
 
 	if (mem->aperture != APERTURE_SYSMEM || g->mm.force_pramin)
 		return 0;
 
-	/*
-	 * WAR for bug 2040115: we already will always have a coherent vmap()
-	 * for all sysmem buffers. The prot settings are left alone since
-	 * eventually this should be deleted.
-	 */
-	if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
-		return 0;
-
 	/*
 	 * A CPU mapping is implicitly made for all SYSMEM DMA allocations that
 	 * don't have NVGPU_DMA_NO_KERNEL_MAPPING. Thus we don't need to make
@@ -81,13 +96,6 @@ void nvgpu_mem_end(struct gk20a *g, struct nvgpu_mem *mem)
 	if (mem->aperture != APERTURE_SYSMEM || g->mm.force_pramin)
 		return;
 
-	/*
-	 * WAR for bug 2040115: skip this since the map will be taken care of
-	 * during the free in the DMA API.
-	 */
-	if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
-		return;
-
 	/*
 	 * Similar to nvgpu_mem_begin() we don't need to unmap the CPU mapping
 	 * already made by the DMA API.
@@ -307,8 +315,7 @@ void nvgpu_memset(struct gk20a *g, struct nvgpu_mem *mem, u32 offset,
  */
 u64 nvgpu_mem_get_addr_sgl(struct gk20a *g, struct scatterlist *sgl)
 {
-	if (nvgpu_is_enabled(g, NVGPU_MM_USE_PHYSICAL_SG) ||
-	    !nvgpu_iommuable(g))
+	if (!nvgpu_iommuable(g))
 		return g->ops.mm.gpu_phys_addr(g, NULL, sg_phys(sgl));
 
 	if (sg_dma_address(sgl) == 0)
@@ -408,12 +415,8 @@ int nvgpu_mem_create_from_mem(struct gk20a *g,
 
 	/*
 	 * Re-use the CPU mapping only if the mapping was made by the DMA API.
-	 *
-	 * Bug 2040115: the DMA API wrapper makes the mapping that we should
-	 * re-use.
 	 */
-	if (!(src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING) ||
-	    nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
+	if (!(src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING))
 		dest->cpu_va = src->cpu_va + (PAGE_SIZE * start_page);
 
 	dest->priv.pages = src->priv.pages + start_page;
diff --git a/drivers/gpu/nvgpu/common/linux/pci.c b/drivers/gpu/nvgpu/common/linux/pci.c
index 973da9ca..6ebe8dda 100644
--- a/drivers/gpu/nvgpu/common/linux/pci.c
+++ b/drivers/gpu/nvgpu/common/linux/pci.c
@@ -17,13 +17,13 @@
 #include <linux/pci.h>
 #include <linux/interrupt.h>
 #include <linux/pm_runtime.h>
-#include <linux/of_platform.h>
-#include <linux/of_address.h>
 
 #include <nvgpu/nvgpu_common.h>
 #include <nvgpu/kmem.h>
 #include <nvgpu/enabled.h>
 #include <nvgpu/nvlink.h>
+#include <linux/of_platform.h>
+#include <linux/of_address.h>
 
 #include "gk20a/gk20a.h"
 #include "clk/clk.h"
@@ -566,12 +566,6 @@ static int nvgpu_pci_probe(struct pci_dev *pdev,
 	platform->g = g;
 	l->dev = &pdev->dev;
 
-	np = nvgpu_get_node(g);
-	if (of_dma_is_coherent(np)) {
-		__nvgpu_set_enabled(g, NVGPU_USE_COHERENT_SYSMEM, true);
-		__nvgpu_set_enabled(g, NVGPU_SUPPORT_IO_COHERENCE, true);
-	}
-
 	err = pci_enable_device(pdev);
 	if (err)
 		return err;
@@ -650,6 +644,13 @@ static int nvgpu_pci_probe(struct pci_dev *pdev,
 
 	g->mm.has_physical_mode = false;
 
+	np = nvgpu_get_node(g);
+
+	if (of_dma_is_coherent(np)) {
+		__nvgpu_set_enabled(g, NVGPU_DMA_COHERENT, true);
+		__nvgpu_set_enabled(g, NVGPU_SUPPORT_IO_COHERENCE, true);
+	}
+
 	return 0;
 }
 
diff --git a/drivers/gpu/nvgpu/common/linux/vm.c b/drivers/gpu/nvgpu/common/linux/vm.c
index 52b2f30c..e3ca4eda 100644
--- a/drivers/gpu/nvgpu/common/linux/vm.c
+++ b/drivers/gpu/nvgpu/common/linux/vm.c
@@ -166,8 +166,7 @@ struct nvgpu_mapped_buf *nvgpu_vm_find_mapping(struct vm_gk20a *vm,
 		  vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10,
 		  vm_aspace_id(vm),
 		  mapped_buffer->flags,
-		  nvgpu_aperture_str(g,
-				     gk20a_dmabuf_aperture(g, os_buf->dmabuf)));
+		  nvgpu_aperture_str(gk20a_dmabuf_aperture(g, os_buf->dmabuf)));
 
 	return mapped_buffer;
 }
diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c
index 41343718..ffac324c 100644
--- a/drivers/gpu/nvgpu/common/mm/gmmu.c
+++ b/drivers/gpu/nvgpu/common/mm/gmmu.c
@@ -79,13 +79,6 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm,
 	if (!sgt)
 		return -ENOMEM;
 
-	/*
-	 * If the GPU is IO coherent and the DMA API is giving us IO coherent
-	 * CPU mappings then we gotta make sure we use the IO coherent aperture.
-	 */
-	if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
-		flags |= NVGPU_VM_MAP_IO_COHERENT;
-
 	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
 	vaddr = g->ops.mm.gmmu_map(vm, addr,
 				   sgt,    /* sg list */
@@ -634,7 +627,7 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
 		   page_size >> 10,
 		   nvgpu_gmmu_perm_str(attrs->rw_flag),
 		   attrs->kind_v,
-		   nvgpu_aperture_str(g, attrs->aperture),
+		   nvgpu_aperture_str(attrs->aperture),
 		   attrs->cacheable ? 'C' : '-',
 		   attrs->sparse    ? 'S' : '-',
 		   attrs->priv      ? 'P' : '-',
@@ -711,13 +704,6 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
 
 	attrs.l3_alloc = (bool)(flags & NVGPU_VM_MAP_L3_ALLOC);
 
-	/*
-	 * Handle the IO coherency aperture: make sure the .aperture field is
-	 * correct based on the IO coherency flag.
-	 */
-	if (attrs.coherent && attrs.aperture == APERTURE_SYSMEM)
-		attrs.aperture = __APERTURE_SYSMEM_COH;
-
 	/*
 	 * Only allocate a new GPU VA range if we haven't already been passed a
 	 * GPU VA range. This facilitates fixed mappings.
diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
index 78a57b4e..73b6b2a7 100644
--- a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
+++ b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
@@ -28,52 +28,6 @@
 
 #include "gk20a/gk20a.h"
 
-/*
- * Make sure to use the right coherency aperture if you use this function! This
- * will not add any checks. If you want to simply use the default coherency then
- * use nvgpu_aperture_mask().
- */
-u32 __nvgpu_aperture_mask(struct gk20a *g, enum nvgpu_aperture aperture,
-			  u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask)
-{
-	/*
-	 * Some iGPUs treat sysmem (i.e SoC DRAM) as vidmem. In these cases the
-	 * "sysmem" aperture should really be translated to VIDMEM.
-	 */
-	if (!nvgpu_is_enabled(g, NVGPU_MM_HONORS_APERTURE))
-		aperture = APERTURE_VIDMEM;
-
-	switch (aperture) {
-	case __APERTURE_SYSMEM_COH:
-		return sysmem_coh_mask;
-	case APERTURE_SYSMEM:
-		return sysmem_mask;
-	case APERTURE_VIDMEM:
-		return vidmem_mask;
-	case APERTURE_INVALID:
-		WARN_ON("Bad aperture");
-	}
-	return 0;
-}
-
-u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem,
-			u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask)
-{
-	enum nvgpu_aperture ap = mem->aperture;
-
-	/*
-	 * Handle the coherent aperture: ideally most of the driver is not
-	 * aware of the difference between coherent and non-coherent sysmem so
-	 * we add this translation step here.
-	 */
-	if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) &&
-	    ap == APERTURE_SYSMEM)
-		ap = __APERTURE_SYSMEM_COH;
-
-	return __nvgpu_aperture_mask(g, ap,
-				     sysmem_mask, sysmem_coh_mask, vidmem_mask);
-}
-
 void *nvgpu_sgt_get_next(struct nvgpu_sgt *sgt, void *sgl)
 {
 	return sgt->ops->sgl_next(sgl);
-- 
cgit v1.2.2