From 5a35a95654d561fce09a3b9abf6b82bb7a29d74b Mon Sep 17 00:00:00 2001 From: Alex Waterman Date: Wed, 28 Feb 2018 09:19:19 -0800 Subject: Revert "gpu: nvgpu: Get coherency on gv100 + NVLINK working" Also revert other changes related to IO coherence. This may be the culprit in a recent dev-kernel lockdown. Bug 2070609 Change-Id: Ida178aef161fadbc6db9512521ea51c702c1564b Signed-off-by: Alex Waterman Reviewed-on: https://git-master.nvidia.com/r/1665914 Reviewed-by: svc-mobile-coverity GVS: Gerrit_Virtual_Submit Reviewed-by: Srikar Srimath Tirumala --- drivers/gpu/nvgpu/common/linux/dma.c | 34 ++------------------ drivers/gpu/nvgpu/common/linux/module.c | 8 ----- drivers/gpu/nvgpu/common/linux/nvgpu_mem.c | 51 ++++++++++++++++-------------- drivers/gpu/nvgpu/common/linux/pci.c | 17 +++++----- drivers/gpu/nvgpu/common/linux/vm.c | 3 +- drivers/gpu/nvgpu/common/mm/gmmu.c | 16 +--------- drivers/gpu/nvgpu/common/mm/nvgpu_mem.c | 46 --------------------------- 7 files changed, 41 insertions(+), 134 deletions(-) (limited to 'drivers/gpu/nvgpu/common') diff --git a/drivers/gpu/nvgpu/common/linux/dma.c b/drivers/gpu/nvgpu/common/linux/dma.c index 81aebb7d..c13dae8b 100644 --- a/drivers/gpu/nvgpu/common/linux/dma.c +++ b/drivers/gpu/nvgpu/common/linux/dma.c @@ -221,16 +221,6 @@ int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags, NVGPU_DEFINE_DMA_ATTRS(dma_attrs); void *alloc_ret; - /* - * WAR for IO coherent chips: the DMA API does not seem to generate - * mappings that work correctly. Unclear why - Bug ID: 2040115. - * - * Basically we just tell the DMA API not to map with NO_KERNEL_MAPPING - * and then make a vmap() ourselves. - */ - if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) - flags |= NVGPU_DMA_NO_KERNEL_MAPPING; - /* * Before the debug print so we see this in the total. But during * cleanup in the fail path this has to be subtracted. @@ -265,17 +255,7 @@ int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags, iova, size, flags); } if (err) - goto fail_free_dma; - - if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) { - mem->cpu_va = vmap(mem->priv.pages, - size >> PAGE_SHIFT, - 0, PAGE_KERNEL); - if (!mem->cpu_va) { - err = -ENOMEM; - goto fail_free_sgt; - } - } + goto fail_free; mem->aligned_size = size; mem->aperture = APERTURE_SYSMEM; @@ -285,14 +265,12 @@ int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags, return 0; -fail_free_sgt: - nvgpu_free_sgtable(g, &mem->priv.sgt); -fail_free_dma: +fail_free: + g->dma_memory_used -= mem->aligned_size; dma_free_attrs(d, size, alloc_ret, iova, NVGPU_DMA_ATTR(dma_attrs)); mem->cpu_va = NULL; mem->priv.sgt = NULL; mem->size = 0; - g->dma_memory_used -= mem->aligned_size; return err; } @@ -488,12 +466,6 @@ static void nvgpu_dma_free_sys(struct gk20a *g, struct nvgpu_mem *mem) if (!(mem->mem_flags & NVGPU_MEM_FLAG_SHADOW_COPY) && !(mem->mem_flags & __NVGPU_MEM_FLAG_NO_DMA) && (mem->cpu_va || mem->priv.pages)) { - /* - * Free side of WAR for bug 2040115. - */ - if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) - vunmap(mem->cpu_va); - if (mem->priv.flags) { NVGPU_DEFINE_DMA_ATTRS(dma_attrs); diff --git a/drivers/gpu/nvgpu/common/linux/module.c b/drivers/gpu/nvgpu/common/linux/module.c index 741c86e7..b103fcea 100644 --- a/drivers/gpu/nvgpu/common/linux/module.c +++ b/drivers/gpu/nvgpu/common/linux/module.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include @@ -1108,7 +1107,6 @@ static int gk20a_probe(struct platform_device *dev) struct gk20a *gk20a; int err; struct gk20a_platform *platform = NULL; - struct device_node *np; if (dev->dev.of_node) { const struct of_device_id *match; @@ -1149,12 +1147,6 @@ static int gk20a_probe(struct platform_device *dev) if (err) goto return_err; - np = nvgpu_get_node(gk20a); - if (of_dma_is_coherent(np)) { - __nvgpu_set_enabled(gk20a, NVGPU_USE_COHERENT_SYSMEM, true); - __nvgpu_set_enabled(gk20a, NVGPU_SUPPORT_IO_COHERENCE, true); - } - if (nvgpu_platform_is_simulation(gk20a)) __nvgpu_set_enabled(gk20a, NVGPU_IS_FMODEL, true); diff --git a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c index 69897694..206b83e1 100644 --- a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c +++ b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c @@ -34,24 +34,39 @@ #include "gk20a/gk20a.h" #include "gk20a/mm_gk20a.h" +u32 __nvgpu_aperture_mask(struct gk20a *g, enum nvgpu_aperture aperture, + u32 sysmem_mask, u32 vidmem_mask) +{ + switch (aperture) { + case APERTURE_SYSMEM: + /* some igpus consider system memory vidmem */ + return nvgpu_is_enabled(g, NVGPU_MM_HONORS_APERTURE) + ? sysmem_mask : vidmem_mask; + case APERTURE_VIDMEM: + /* for dgpus only */ + return vidmem_mask; + case APERTURE_INVALID: + WARN_ON("Bad aperture"); + } + return 0; +} + +u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem, + u32 sysmem_mask, u32 vidmem_mask) +{ + return __nvgpu_aperture_mask(g, mem->aperture, + sysmem_mask, vidmem_mask); +} + int nvgpu_mem_begin(struct gk20a *g, struct nvgpu_mem *mem) { void *cpu_va; - pgprot_t prot = nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) ? - PAGE_KERNEL : + pgprot_t prot = nvgpu_is_enabled(g, NVGPU_DMA_COHERENT) ? PAGE_KERNEL : pgprot_writecombine(PAGE_KERNEL); if (mem->aperture != APERTURE_SYSMEM || g->mm.force_pramin) return 0; - /* - * WAR for bug 2040115: we already will always have a coherent vmap() - * for all sysmem buffers. The prot settings are left alone since - * eventually this should be deleted. - */ - if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) - return 0; - /* * A CPU mapping is implicitly made for all SYSMEM DMA allocations that * don't have NVGPU_DMA_NO_KERNEL_MAPPING. Thus we don't need to make @@ -81,13 +96,6 @@ void nvgpu_mem_end(struct gk20a *g, struct nvgpu_mem *mem) if (mem->aperture != APERTURE_SYSMEM || g->mm.force_pramin) return; - /* - * WAR for bug 2040115: skip this since the map will be taken care of - * during the free in the DMA API. - */ - if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) - return; - /* * Similar to nvgpu_mem_begin() we don't need to unmap the CPU mapping * already made by the DMA API. @@ -307,8 +315,7 @@ void nvgpu_memset(struct gk20a *g, struct nvgpu_mem *mem, u32 offset, */ u64 nvgpu_mem_get_addr_sgl(struct gk20a *g, struct scatterlist *sgl) { - if (nvgpu_is_enabled(g, NVGPU_MM_USE_PHYSICAL_SG) || - !nvgpu_iommuable(g)) + if (!nvgpu_iommuable(g)) return g->ops.mm.gpu_phys_addr(g, NULL, sg_phys(sgl)); if (sg_dma_address(sgl) == 0) @@ -408,12 +415,8 @@ int nvgpu_mem_create_from_mem(struct gk20a *g, /* * Re-use the CPU mapping only if the mapping was made by the DMA API. - * - * Bug 2040115: the DMA API wrapper makes the mapping that we should - * re-use. */ - if (!(src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING) || - nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) + if (!(src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING)) dest->cpu_va = src->cpu_va + (PAGE_SIZE * start_page); dest->priv.pages = src->priv.pages + start_page; diff --git a/drivers/gpu/nvgpu/common/linux/pci.c b/drivers/gpu/nvgpu/common/linux/pci.c index 973da9ca..6ebe8dda 100644 --- a/drivers/gpu/nvgpu/common/linux/pci.c +++ b/drivers/gpu/nvgpu/common/linux/pci.c @@ -17,13 +17,13 @@ #include #include #include -#include -#include #include #include #include #include +#include +#include #include "gk20a/gk20a.h" #include "clk/clk.h" @@ -566,12 +566,6 @@ static int nvgpu_pci_probe(struct pci_dev *pdev, platform->g = g; l->dev = &pdev->dev; - np = nvgpu_get_node(g); - if (of_dma_is_coherent(np)) { - __nvgpu_set_enabled(g, NVGPU_USE_COHERENT_SYSMEM, true); - __nvgpu_set_enabled(g, NVGPU_SUPPORT_IO_COHERENCE, true); - } - err = pci_enable_device(pdev); if (err) return err; @@ -650,6 +644,13 @@ static int nvgpu_pci_probe(struct pci_dev *pdev, g->mm.has_physical_mode = false; + np = nvgpu_get_node(g); + + if (of_dma_is_coherent(np)) { + __nvgpu_set_enabled(g, NVGPU_DMA_COHERENT, true); + __nvgpu_set_enabled(g, NVGPU_SUPPORT_IO_COHERENCE, true); + } + return 0; } diff --git a/drivers/gpu/nvgpu/common/linux/vm.c b/drivers/gpu/nvgpu/common/linux/vm.c index 52b2f30c..e3ca4eda 100644 --- a/drivers/gpu/nvgpu/common/linux/vm.c +++ b/drivers/gpu/nvgpu/common/linux/vm.c @@ -166,8 +166,7 @@ struct nvgpu_mapped_buf *nvgpu_vm_find_mapping(struct vm_gk20a *vm, vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10, vm_aspace_id(vm), mapped_buffer->flags, - nvgpu_aperture_str(g, - gk20a_dmabuf_aperture(g, os_buf->dmabuf))); + nvgpu_aperture_str(gk20a_dmabuf_aperture(g, os_buf->dmabuf))); return mapped_buffer; } diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c index 41343718..ffac324c 100644 --- a/drivers/gpu/nvgpu/common/mm/gmmu.c +++ b/drivers/gpu/nvgpu/common/mm/gmmu.c @@ -79,13 +79,6 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm, if (!sgt) return -ENOMEM; - /* - * If the GPU is IO coherent and the DMA API is giving us IO coherent - * CPU mappings then we gotta make sure we use the IO coherent aperture. - */ - if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) - flags |= NVGPU_VM_MAP_IO_COHERENT; - nvgpu_mutex_acquire(&vm->update_gmmu_lock); vaddr = g->ops.mm.gmmu_map(vm, addr, sgt, /* sg list */ @@ -634,7 +627,7 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm, page_size >> 10, nvgpu_gmmu_perm_str(attrs->rw_flag), attrs->kind_v, - nvgpu_aperture_str(g, attrs->aperture), + nvgpu_aperture_str(attrs->aperture), attrs->cacheable ? 'C' : '-', attrs->sparse ? 'S' : '-', attrs->priv ? 'P' : '-', @@ -711,13 +704,6 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, attrs.l3_alloc = (bool)(flags & NVGPU_VM_MAP_L3_ALLOC); - /* - * Handle the IO coherency aperture: make sure the .aperture field is - * correct based on the IO coherency flag. - */ - if (attrs.coherent && attrs.aperture == APERTURE_SYSMEM) - attrs.aperture = __APERTURE_SYSMEM_COH; - /* * Only allocate a new GPU VA range if we haven't already been passed a * GPU VA range. This facilitates fixed mappings. diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c index 78a57b4e..73b6b2a7 100644 --- a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c +++ b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c @@ -28,52 +28,6 @@ #include "gk20a/gk20a.h" -/* - * Make sure to use the right coherency aperture if you use this function! This - * will not add any checks. If you want to simply use the default coherency then - * use nvgpu_aperture_mask(). - */ -u32 __nvgpu_aperture_mask(struct gk20a *g, enum nvgpu_aperture aperture, - u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask) -{ - /* - * Some iGPUs treat sysmem (i.e SoC DRAM) as vidmem. In these cases the - * "sysmem" aperture should really be translated to VIDMEM. - */ - if (!nvgpu_is_enabled(g, NVGPU_MM_HONORS_APERTURE)) - aperture = APERTURE_VIDMEM; - - switch (aperture) { - case __APERTURE_SYSMEM_COH: - return sysmem_coh_mask; - case APERTURE_SYSMEM: - return sysmem_mask; - case APERTURE_VIDMEM: - return vidmem_mask; - case APERTURE_INVALID: - WARN_ON("Bad aperture"); - } - return 0; -} - -u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem, - u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask) -{ - enum nvgpu_aperture ap = mem->aperture; - - /* - * Handle the coherent aperture: ideally most of the driver is not - * aware of the difference between coherent and non-coherent sysmem so - * we add this translation step here. - */ - if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) && - ap == APERTURE_SYSMEM) - ap = __APERTURE_SYSMEM_COH; - - return __nvgpu_aperture_mask(g, ap, - sysmem_mask, sysmem_coh_mask, vidmem_mask); -} - void *nvgpu_sgt_get_next(struct nvgpu_sgt *sgt, void *sgl) { return sgt->ops->sgl_next(sgl); -- cgit v1.2.2