gpu: nvgpu: Enable IO coherency on GV100

This reverts commit 848af2ce6de6140323a6ffe3075bf8021e119434. This is a revert of a revert, etc, etc. It re-enables IO coherence again. JIRA EVLR-2333 Change-Id: Ibf97dce2f892e48a1200a06cd38a1c5d9603be04 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1669722 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: Alex Waterman <alexw@nvidia.com> 2018-03-06 13:43:16 -0500
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2018-03-07 21:04:41 -0500
commit: 418f31cd91a5c3ca45f0920ed64205def49c8a80 (patch)
tree: 17e3e04065679788aeeff645842866df0d59ccd0 /drivers/gpu/nvgpu/common
parent: f85a0d3e00b53453f3d5ca556f15465078473f31 (diff)
7 files changed, 135 insertions, 41 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/dma.c b/drivers/gpu/nvgpu/common/linux/dma.c
index c111d22b..53e54bc6 100644
--- a/drivers/gpu/nvgpu/common/linux/dma.c
+++ b/drivers/gpu/nvgpu/common/linux/dma.c
@@ -227,6 +227,16 @@ int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags,
        }
        /*
+         * WAR for IO coherent chips: the DMA API does not seem to generate
+         * mappings that work correctly. Unclear why - Bug ID: 2040115.
+         *
+         * Basically we just tell the DMA API not to map with NO_KERNEL_MAPPING
+         * and then make a vmap() ourselves.
+         */
+        if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
+                flags |= NVGPU_DMA_NO_KERNEL_MAPPING;
+        /*
         * Before the debug print so we see this in the total. But during
         * cleanup in the fail path this has to be subtracted.
         */
@@ -260,7 +270,17 @@ int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags,
                                        iova, size, flags);
        }
        if (err)
-                goto fail_free;
+                goto fail_free_dma;
+        if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) {
+                mem->cpu_va = vmap(mem->priv.pages,
+                                   size >> PAGE_SHIFT,
+                                   0, PAGE_KERNEL);
+                if (!mem->cpu_va) {
+                        err = -ENOMEM;
+                        goto fail_free_sgt;
+                }
+        }
        mem->aligned_size = size;
        mem->aperture = APERTURE_SYSMEM;
@@ -270,12 +290,14 @@ int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags,
        return 0;
-fail_free:
+fail_free_sgt:
-        g->dma_memory_used -= mem->aligned_size;
+        nvgpu_free_sgtable(g, &mem->priv.sgt);
+fail_free_dma:
        dma_free_attrs(d, size, alloc_ret, iova, NVGPU_DMA_ATTR(dma_attrs));
        mem->cpu_va = NULL;
        mem->priv.sgt = NULL;
        mem->size = 0;
+        g->dma_memory_used -= mem->aligned_size;
        return err;
 }
@@ -476,6 +498,12 @@ static void nvgpu_dma_free_sys(struct gk20a *g, struct nvgpu_mem *mem)
        if (!(mem->mem_flags & NVGPU_MEM_FLAG_SHADOW_COPY) &&
            !(mem->mem_flags & __NVGPU_MEM_FLAG_NO_DMA) &&
            (mem->cpu_va || mem->priv.pages)) {
+                /*
+                 * Free side of WAR for bug 2040115.
+                 */
+                if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
+                        vunmap(mem->cpu_va);
                if (mem->priv.flags) {
                        NVGPU_DEFINE_DMA_ATTRS(dma_attrs);
diff --git a/drivers/gpu/nvgpu/common/linux/module.c b/drivers/gpu/nvgpu/common/linux/module.c
index 59691c64..92e47e2d 100644
--- a/drivers/gpu/nvgpu/common/linux/module.c
+++ b/drivers/gpu/nvgpu/common/linux/module.c
@@ -20,6 +20,7 @@
 #include <linux/of.h>
 #include <linux/of_device.h>
 #include <linux/of_platform.h>
+#include <linux/of_address.h>
 #include <linux/interrupt.h>
 #include <linux/pm_runtime.h>
 #include <linux/reset.h>
@@ -1111,6 +1112,7 @@ static int gk20a_probe(struct platform_device *dev)
        struct gk20a *gk20a;
        int err;
        struct gk20a_platform *platform = NULL;
+        struct device_node *np;
        if (dev->dev.of_node) {
                const struct of_device_id *match;
@@ -1151,6 +1153,12 @@ static int gk20a_probe(struct platform_device *dev)
        if (err)
                goto return_err;
+        np = nvgpu_get_node(gk20a);
+        if (of_dma_is_coherent(np)) {
+                __nvgpu_set_enabled(gk20a, NVGPU_USE_COHERENT_SYSMEM, true);
+                __nvgpu_set_enabled(gk20a, NVGPU_SUPPORT_IO_COHERENCE, true);
+        }
        if (nvgpu_platform_is_simulation(gk20a))
                __nvgpu_set_enabled(gk20a, NVGPU_IS_FMODEL, true);
diff --git a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
index 7406c4d7..d4549e1b 100644
--- a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
+++ b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
@@ -34,40 +34,25 @@
 #include "gk20a/gk20a.h"
 #include "gk20a/mm_gk20a.h"
-u32 __nvgpu_aperture_mask(struct gk20a *g, enum nvgpu_aperture aperture,
-                u32 sysmem_mask, u32 vidmem_mask)
-{
-        switch (aperture) {
-        case APERTURE_SYSMEM:
-                /* some igpus consider system memory vidmem */
-                return nvgpu_is_enabled(g, NVGPU_MM_HONORS_APERTURE)
-                        ? sysmem_mask : vidmem_mask;
-        case APERTURE_VIDMEM:
-                /* for dgpus only */
-                return vidmem_mask;
-        case APERTURE_INVALID:
-                WARN_ON("Bad aperture");
-        }
-        return 0;
-}
-u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem,
-                u32 sysmem_mask, u32 vidmem_mask)
-{
-        return __nvgpu_aperture_mask(g, mem->aperture,
-                        sysmem_mask, vidmem_mask);
-}
 int nvgpu_mem_begin(struct gk20a *g, struct nvgpu_mem *mem)
 {
        void *cpu_va;
-        pgprot_t prot = nvgpu_is_enabled(g, NVGPU_DMA_COHERENT) ? PAGE_KERNEL :
+        pgprot_t prot = nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) ?
+                PAGE_KERNEL :
                pgprot_writecombine(PAGE_KERNEL);
        if (mem->aperture != APERTURE_SYSMEM || g->mm.force_pramin)
                return 0;
        /*
+         * WAR for bug 2040115: we already will always have a coherent vmap()
+         * for all sysmem buffers. The prot settings are left alone since
+         * eventually this should be deleted.
+         */
+        if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
+                return 0;
+        /*
         * A CPU mapping is implicitly made for all SYSMEM DMA allocations that
         * don't have NVGPU_DMA_NO_KERNEL_MAPPING. Thus we don't need to make
         * another CPU mapping.
@@ -97,6 +82,13 @@ void nvgpu_mem_end(struct gk20a *g, struct nvgpu_mem *mem)
                return;
        /*
+         * WAR for bug 2040115: skip this since the map will be taken care of
+         * during the free in the DMA API.
+         */
+        if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
+                return;
+        /*
         * Similar to nvgpu_mem_begin() we don't need to unmap the CPU mapping
         * already made by the DMA API.
         */
@@ -315,7 +307,8 @@ void nvgpu_memset(struct gk20a *g, struct nvgpu_mem *mem, u32 offset,
 */
 u64 nvgpu_mem_get_addr_sgl(struct gk20a *g, struct scatterlist *sgl)
 {
-        if (!nvgpu_iommuable(g))
+        if (nvgpu_is_enabled(g, NVGPU_MM_USE_PHYSICAL_SG) ||
+            !nvgpu_iommuable(g))
                return g->ops.mm.gpu_phys_addr(g, NULL, sg_phys(sgl));
        if (sg_dma_address(sgl) == 0)
@@ -415,8 +408,12 @@ int nvgpu_mem_create_from_mem(struct gk20a *g,
        /*
         * Re-use the CPU mapping only if the mapping was made by the DMA API.
+         *
+         * Bug 2040115: the DMA API wrapper makes the mapping that we should
+         * re-use.
         */
-        if (!(src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING))
+        if (!(src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING) ||
+            nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
                dest->cpu_va = src->cpu_va + (PAGE_SIZE * start_page);
        dest->priv.pages = src->priv.pages + start_page;
diff --git a/drivers/gpu/nvgpu/common/linux/pci.c b/drivers/gpu/nvgpu/common/linux/pci.c
index 6ebe8dda..973da9ca 100644
--- a/drivers/gpu/nvgpu/common/linux/pci.c
+++ b/drivers/gpu/nvgpu/common/linux/pci.c
@@ -17,13 +17,13 @@
 #include <linux/pci.h>
 #include <linux/interrupt.h>
 #include <linux/pm_runtime.h>
+#include <linux/of_platform.h>
+#include <linux/of_address.h>
 #include <nvgpu/nvgpu_common.h>
 #include <nvgpu/kmem.h>
 #include <nvgpu/enabled.h>
 #include <nvgpu/nvlink.h>
-#include <linux/of_platform.h>
-#include <linux/of_address.h>
 #include "gk20a/gk20a.h"
 #include "clk/clk.h"
@@ -566,6 +566,12 @@ static int nvgpu_pci_probe(struct pci_dev *pdev,
        platform->g = g;
        l->dev = &pdev->dev;
+        np = nvgpu_get_node(g);
+        if (of_dma_is_coherent(np)) {
+                __nvgpu_set_enabled(g, NVGPU_USE_COHERENT_SYSMEM, true);
+                __nvgpu_set_enabled(g, NVGPU_SUPPORT_IO_COHERENCE, true);
+        }
        err = pci_enable_device(pdev);
        if (err)
                return err;
@@ -644,13 +650,6 @@ static int nvgpu_pci_probe(struct pci_dev *pdev,
        g->mm.has_physical_mode = false;
-        np = nvgpu_get_node(g);
-        if (of_dma_is_coherent(np)) {
-                __nvgpu_set_enabled(g, NVGPU_DMA_COHERENT, true);
-                __nvgpu_set_enabled(g, NVGPU_SUPPORT_IO_COHERENCE, true);
-        }
        return 0;
 }
diff --git a/drivers/gpu/nvgpu/common/linux/vm.c b/drivers/gpu/nvgpu/common/linux/vm.c
index e3ca4eda..52b2f30c 100644
--- a/drivers/gpu/nvgpu/common/linux/vm.c
+++ b/drivers/gpu/nvgpu/common/linux/vm.c
@@ -166,7 +166,8 @@ struct nvgpu_mapped_buf *nvgpu_vm_find_mapping(struct vm_gk20a *vm,
                  vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10,
                  vm_aspace_id(vm),
                  mapped_buffer->flags,
-                  nvgpu_aperture_str(gk20a_dmabuf_aperture(g, os_buf->dmabuf)));
+                  nvgpu_aperture_str(g,
+                                     gk20a_dmabuf_aperture(g, os_buf->dmabuf)));
        return mapped_buffer;
 }
diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c
index e1942cbd..3b57e781 100644
--- a/drivers/gpu/nvgpu/common/mm/gmmu.c
+++ b/drivers/gpu/nvgpu/common/mm/gmmu.c
@@ -79,6 +79,13 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm,
        if (!sgt)
                return -ENOMEM;
+        /*
+         * If the GPU is IO coherent and the DMA API is giving us IO coherent
+         * CPU mappings then we gotta make sure we use the IO coherent aperture.
+         */
+        if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
+                flags |= NVGPU_VM_MAP_IO_COHERENT;
        nvgpu_mutex_acquire(&vm->update_gmmu_lock);
        vaddr = g->ops.mm.gmmu_map(vm, addr,
                                   sgt,    /* sg list */
@@ -627,7 +634,7 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
                   page_size >> 10,
                   nvgpu_gmmu_perm_str(attrs->rw_flag),
                   attrs->kind_v,
-                   nvgpu_aperture_str(attrs->aperture),
+                   nvgpu_aperture_str(g, attrs->aperture),
                   attrs->cacheable ? 'C' : '-',
                   attrs->sparse    ? 'S' : '-',
                   attrs->priv      ? 'P' : '-',
@@ -705,6 +712,13 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
        attrs.l3_alloc = (bool)(flags & NVGPU_VM_MAP_L3_ALLOC);
        /*
+         * Handle the IO coherency aperture: make sure the .aperture field is
+         * correct based on the IO coherency flag.
+         */
+        if (attrs.coherent && attrs.aperture == APERTURE_SYSMEM)
+                attrs.aperture = __APERTURE_SYSMEM_COH;
+        /*
         * Only allocate a new GPU VA range if we haven't already been passed a
         * GPU VA range. This facilitates fixed mappings.
         */
diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
index f7c51f42..2b32d869 100644
--- a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
+++ b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
@@ -28,6 +28,53 @@
 #include "gk20a/gk20a.h"
+/*
+ * Make sure to use the right coherency aperture if you use this function! This
+ * will not add any checks. If you want to simply use the default coherency then
+ * use nvgpu_aperture_mask().
+ */
+u32 __nvgpu_aperture_mask(struct gk20a *g, enum nvgpu_aperture aperture,
+                          u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask)
+{
+        /*
+         * Some iGPUs treat sysmem (i.e SoC DRAM) as vidmem. In these cases the
+         * "sysmem" aperture should really be translated to VIDMEM.
+         */
+        if (!nvgpu_is_enabled(g, NVGPU_MM_HONORS_APERTURE))
+                aperture = APERTURE_VIDMEM;
+        switch (aperture) {
+        case __APERTURE_SYSMEM_COH:
+                return sysmem_coh_mask;
+        case APERTURE_SYSMEM:
+                return sysmem_mask;
+        case APERTURE_VIDMEM:
+                return vidmem_mask;
+        case APERTURE_INVALID:
+                WARN_ON("Bad aperture");
+        }
+        return 0;
+}
+u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem,
+                        u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask)
+{
+        enum nvgpu_aperture ap = mem->aperture;
+        /*
+         * Handle the coherent aperture: ideally most of the driver is not
+         * aware of the difference between coherent and non-coherent sysmem so
+         * we add this translation step here.
+         */
+        if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) &&
+            ap == APERTURE_SYSMEM)
+                ap = __APERTURE_SYSMEM_COH;
+        return __nvgpu_aperture_mask(g, ap,
+                                     sysmem_mask, sysmem_coh_mask, vidmem_mask);
+}
 struct nvgpu_sgl *nvgpu_sgt_get_next(struct nvgpu_sgt *sgt,
                                     struct nvgpu_sgl *sgl)
 {
author	Alex Waterman <alexw@nvidia.com>	2018-03-06 13:43:16 -0500
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2018-03-07 21:04:41 -0500
commit	418f31cd91a5c3ca45f0920ed64205def49c8a80 (patch)
tree	17e3e04065679788aeeff645842866df0d59ccd0 /drivers/gpu/nvgpu/common
parent	f85a0d3e00b53453f3d5ca556f15465078473f31 (diff)