1 files changed, 613 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/os/linux/nvgpu_mem.c b/drivers/gpu/nvgpu/os/linux/nvgpu_mem.c
new file mode 100644
index 00000000..93925803
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/nvgpu_mem.c
@@ -0,0 +1,613 @@
+/*
+ * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <nvgpu/dma.h>
+#include <nvgpu/gmmu.h>
+#include <nvgpu/nvgpu_mem.h>
+#include <nvgpu/page_allocator.h>
+#include <nvgpu/log.h>
+#include <nvgpu/bug.h>
+#include <nvgpu/enabled.h>
+#include <nvgpu/kmem.h>
+#include <nvgpu/vidmem.h>
+#include <nvgpu/linux/dma.h>
+#include <nvgpu/linux/vidmem.h>
+#include <linux/vmalloc.h>
+#include "os_linux.h"
+#include "gk20a/gk20a.h"
+#include "gk20a/mm_gk20a.h"
+#include "platform_gk20a.h"
+static u64 __nvgpu_sgl_phys(struct gk20a *g, struct nvgpu_sgl *sgl)
+{
+        struct device *dev = dev_from_gk20a(g);
+        struct gk20a_platform *platform = gk20a_get_platform(dev);
+        u64 ipa = sg_phys((struct scatterlist *)sgl);
+        if (platform->phys_addr)
+                return platform->phys_addr(g, ipa);
+        return ipa;
+}
+int nvgpu_mem_begin(struct gk20a *g, struct nvgpu_mem *mem)
+{
+        void *cpu_va;
+        pgprot_t prot = nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) ?
+                PAGE_KERNEL :
+                pgprot_writecombine(PAGE_KERNEL);
+        if (mem->aperture != APERTURE_SYSMEM)
+                return 0;
+        /*
+         * WAR for bug 2040115: we already will always have a coherent vmap()
+         * for all sysmem buffers. The prot settings are left alone since
+         * eventually this should be deleted.
+         */
+        if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
+                return 0;
+        /*
+         * A CPU mapping is implicitly made for all SYSMEM DMA allocations that
+         * don't have NVGPU_DMA_NO_KERNEL_MAPPING. Thus we don't need to make
+         * another CPU mapping.
+         */
+        if (!(mem->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING))
+                return 0;
+        if (WARN_ON(mem->cpu_va)) {
+                nvgpu_warn(g, "nested");
+                return -EBUSY;
+        }
+        cpu_va = vmap(mem->priv.pages,
+                        PAGE_ALIGN(mem->size) >> PAGE_SHIFT,
+                        0, prot);
+        if (WARN_ON(!cpu_va))
+                return -ENOMEM;
+        mem->cpu_va = cpu_va;
+        return 0;
+}
+void nvgpu_mem_end(struct gk20a *g, struct nvgpu_mem *mem)
+{
+        if (mem->aperture != APERTURE_SYSMEM)
+                return;
+        /*
+         * WAR for bug 2040115: skip this since the map will be taken care of
+         * during the free in the DMA API.
+         */
+        if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
+                return;
+        /*
+         * Similar to nvgpu_mem_begin() we don't need to unmap the CPU mapping
+         * already made by the DMA API.
+         */
+        if (!(mem->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING))
+                return;
+        vunmap(mem->cpu_va);
+        mem->cpu_va = NULL;
+}
+static void pramin_access_batch_rd_n(struct gk20a *g, u32 start, u32 words, u32 **arg)
+{
+        struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+        u32 r = start, *dest_u32 = *arg;
+        if (!l->regs) {
+                __gk20a_warn_on_no_regs();
+                return;
+        }
+        while (words--) {
+                *dest_u32++ = gk20a_readl(g, r);
+                r += sizeof(u32);
+        }
+        *arg = dest_u32;
+}
+u32 nvgpu_mem_rd32(struct gk20a *g, struct nvgpu_mem *mem, u32 w)
+{
+        u32 data = 0;
+        if (mem->aperture == APERTURE_SYSMEM) {
+                u32 *ptr = mem->cpu_va;
+                WARN_ON(!ptr);
+                data = ptr[w];
+#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
+                nvgpu_log(g, gpu_dbg_mem, " %p = 0x%x", ptr + w, data);
+#endif
+        } else if (mem->aperture == APERTURE_VIDMEM) {
+                u32 value;
+                u32 *p = &value;
+                nvgpu_pramin_access_batched(g, mem, w * sizeof(u32),
+                                sizeof(u32), pramin_access_batch_rd_n, &p);
+                data = value;
+        } else {
+                WARN_ON("Accessing unallocated nvgpu_mem");
+        }
+        return data;
+}
+u32 nvgpu_mem_rd(struct gk20a *g, struct nvgpu_mem *mem, u32 offset)
+{
+        WARN_ON(offset & 3);
+        return nvgpu_mem_rd32(g, mem, offset / sizeof(u32));
+}
+void nvgpu_mem_rd_n(struct gk20a *g, struct nvgpu_mem *mem,
+                u32 offset, void *dest, u32 size)
+{
+        WARN_ON(offset & 3);
+        WARN_ON(size & 3);
+        if (mem->aperture == APERTURE_SYSMEM) {
+                u8 *src = (u8 *)mem->cpu_va + offset;
+                WARN_ON(!mem->cpu_va);
+                memcpy(dest, src, size);
+#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
+                if (size)
+                        nvgpu_log(g, gpu_dbg_mem, " %p = 0x%x ... [%d bytes]",
+                                        src, *dest, size);
+#endif
+        } else if (mem->aperture == APERTURE_VIDMEM) {
+                u32 *dest_u32 = dest;
+                nvgpu_pramin_access_batched(g, mem, offset, size,
+                                pramin_access_batch_rd_n, &dest_u32);
+        } else {
+                WARN_ON("Accessing unallocated nvgpu_mem");
+        }
+}
+static void pramin_access_batch_wr_n(struct gk20a *g, u32 start, u32 words, u32 **arg)
+{
+        struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+        u32 r = start, *src_u32 = *arg;
+        if (!l->regs) {
+                __gk20a_warn_on_no_regs();
+                return;
+        }
+        while (words--) {
+                writel_relaxed(*src_u32++, l->regs + r);
+                r += sizeof(u32);
+        }
+        *arg = src_u32;
+}
+void nvgpu_mem_wr32(struct gk20a *g, struct nvgpu_mem *mem, u32 w, u32 data)
+{
+        if (mem->aperture == APERTURE_SYSMEM) {
+                u32 *ptr = mem->cpu_va;
+                WARN_ON(!ptr);
+#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
+                nvgpu_log(g, gpu_dbg_mem, " %p = 0x%x", ptr + w, data);
+#endif
+                ptr[w] = data;
+        } else if (mem->aperture == APERTURE_VIDMEM) {
+                u32 value = data;
+                u32 *p = &value;
+                nvgpu_pramin_access_batched(g, mem, w * sizeof(u32),
+                                sizeof(u32), pramin_access_batch_wr_n, &p);
+                if (!mem->skip_wmb)
+                        wmb();
+        } else {
+                WARN_ON("Accessing unallocated nvgpu_mem");
+        }
+}
+void nvgpu_mem_wr(struct gk20a *g, struct nvgpu_mem *mem, u32 offset, u32 data)
+{
+        WARN_ON(offset & 3);
+        nvgpu_mem_wr32(g, mem, offset / sizeof(u32), data);
+}
+void nvgpu_mem_wr_n(struct gk20a *g, struct nvgpu_mem *mem, u32 offset,
+                void *src, u32 size)
+{
+        WARN_ON(offset & 3);
+        WARN_ON(size & 3);
+        if (mem->aperture == APERTURE_SYSMEM) {
+                u8 *dest = (u8 *)mem->cpu_va + offset;
+                WARN_ON(!mem->cpu_va);
+#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
+                if (size)
+                        nvgpu_log(g, gpu_dbg_mem, " %p = 0x%x ... [%d bytes]",
+                                        dest, *src, size);
+#endif
+                memcpy(dest, src, size);
+        } else if (mem->aperture == APERTURE_VIDMEM) {
+                u32 *src_u32 = src;
+                nvgpu_pramin_access_batched(g, mem, offset, size,
+                                pramin_access_batch_wr_n, &src_u32);
+                if (!mem->skip_wmb)
+                        wmb();
+        } else {
+                WARN_ON("Accessing unallocated nvgpu_mem");
+        }
+}
+static void pramin_access_batch_set(struct gk20a *g, u32 start, u32 words, u32 **arg)
+{
+        struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+        u32 r = start, repeat = **arg;
+        if (!l->regs) {
+                __gk20a_warn_on_no_regs();
+                return;
+        }
+        while (words--) {
+                writel_relaxed(repeat, l->regs + r);
+                r += sizeof(u32);
+        }
+}
+void nvgpu_memset(struct gk20a *g, struct nvgpu_mem *mem, u32 offset,
+                u32 c, u32 size)
+{
+        WARN_ON(offset & 3);
+        WARN_ON(size & 3);
+        WARN_ON(c & ~0xff);
+        c &= 0xff;
+        if (mem->aperture == APERTURE_SYSMEM) {
+                u8 *dest = (u8 *)mem->cpu_va + offset;
+                WARN_ON(!mem->cpu_va);
+#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
+                if (size)
+                        nvgpu_log(g, gpu_dbg_mem, " %p = 0x%x [times %d]",
+                                dest, c, size);
+#endif
+                memset(dest, c, size);
+        } else if (mem->aperture == APERTURE_VIDMEM) {
+                u32 repeat_value = c | (c << 8) | (c << 16) | (c << 24);
+                u32 *p = &repeat_value;
+                nvgpu_pramin_access_batched(g, mem, offset, size,
+                                pramin_access_batch_set, &p);
+                if (!mem->skip_wmb)
+                        wmb();
+        } else {
+                WARN_ON("Accessing unallocated nvgpu_mem");
+        }
+}
+/*
+ * Obtain a SYSMEM address from a Linux SGL. This should eventually go away
+ * and/or become private to this file once all bad usages of Linux SGLs are
+ * cleaned up in the driver.
+ */
+u64 nvgpu_mem_get_addr_sgl(struct gk20a *g, struct scatterlist *sgl)
+{
+        if (nvgpu_is_enabled(g, NVGPU_MM_USE_PHYSICAL_SG) ||
+            !nvgpu_iommuable(g))
+                return g->ops.mm.gpu_phys_addr(g, NULL,
+                        __nvgpu_sgl_phys(g, (struct nvgpu_sgl *)sgl));
+        if (sg_dma_address(sgl) == 0)
+                return g->ops.mm.gpu_phys_addr(g, NULL,
+                        __nvgpu_sgl_phys(g, (struct nvgpu_sgl *)sgl));
+        if (sg_dma_address(sgl) == DMA_ERROR_CODE)
+                return 0;
+        return nvgpu_mem_iommu_translate(g, sg_dma_address(sgl));
+}
+/*
+ * Obtain the address the GPU should use from the %mem assuming this is a SYSMEM
+ * allocation.
+ */
+static u64 nvgpu_mem_get_addr_sysmem(struct gk20a *g, struct nvgpu_mem *mem)
+{
+        return nvgpu_mem_get_addr_sgl(g, mem->priv.sgt->sgl);
+}
+/*
+ * Return the base address of %mem. Handles whether this is a VIDMEM or SYSMEM
+ * allocation.
+ *
+ * Note: this API does not make sense to use for _VIDMEM_ buffers with greater
+ * than one scatterlist chunk. If there's more than one scatterlist chunk then
+ * the buffer will not be contiguous. As such the base address probably isn't
+ * very useful. This is true for SYSMEM as well, if there's no IOMMU.
+ *
+ * However! It _is_ OK to use this on discontiguous sysmem buffers _if_ there's
+ * an IOMMU present and enabled for the GPU.
+ *
+ * %attrs can be NULL. If it is not NULL then it may be inspected to determine
+ * if the address needs to be modified before writing into a PTE.
+ */
+u64 nvgpu_mem_get_addr(struct gk20a *g, struct nvgpu_mem *mem)
+{
+        struct nvgpu_page_alloc *alloc;
+        if (mem->aperture == APERTURE_SYSMEM)
+                return nvgpu_mem_get_addr_sysmem(g, mem);
+        /*
+         * Otherwise get the vidmem address.
+         */
+        alloc = mem->vidmem_alloc;
+        /* This API should not be used with > 1 chunks */
+        WARN_ON(alloc->nr_chunks != 1);
+        return alloc->base;
+}
+/*
+ * This should only be used on contiguous buffers regardless of whether
+ * there's an IOMMU present/enabled. This applies to both SYSMEM and
+ * VIDMEM.
+ */
+u64 nvgpu_mem_get_phys_addr(struct gk20a *g, struct nvgpu_mem *mem)
+{
+        /*
+         * For a VIDMEM buf, this is identical to simply get_addr() so just fall
+         * back to that.
+         */
+        if (mem->aperture == APERTURE_VIDMEM)
+                return nvgpu_mem_get_addr(g, mem);
+        return __nvgpu_sgl_phys(g, (struct nvgpu_sgl *)mem->priv.sgt->sgl);
+}
+/*
+ * Be careful how you use this! You are responsible for correctly freeing this
+ * memory.
+ */
+int nvgpu_mem_create_from_mem(struct gk20a *g,
+                              struct nvgpu_mem *dest, struct nvgpu_mem *src,
+                              int start_page, int nr_pages)
+{
+        int ret;
+        u64 start = start_page * PAGE_SIZE;
+        u64 size = nr_pages * PAGE_SIZE;
+        dma_addr_t new_iova;
+        if (src->aperture != APERTURE_SYSMEM)
+                return -EINVAL;
+        /* Some silly things a caller might do... */
+        if (size > src->size)
+                return -EINVAL;
+        if ((start + size) > src->size)
+                return -EINVAL;
+        dest->mem_flags = src->mem_flags | NVGPU_MEM_FLAG_SHADOW_COPY;
+        dest->aperture  = src->aperture;
+        dest->skip_wmb  = src->skip_wmb;
+        dest->size      = size;
+        /*
+         * Re-use the CPU mapping only if the mapping was made by the DMA API.
+         *
+         * Bug 2040115: the DMA API wrapper makes the mapping that we should
+         * re-use.
+         */
+        if (!(src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING) ||
+            nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
+                dest->cpu_va = src->cpu_va + (PAGE_SIZE * start_page);
+        dest->priv.pages = src->priv.pages + start_page;
+        dest->priv.flags = src->priv.flags;
+        new_iova = sg_dma_address(src->priv.sgt->sgl) ?
+                sg_dma_address(src->priv.sgt->sgl) + start : 0;
+        /*
+         * Make a new SG table that is based only on the subset of pages that
+         * is passed to us. This table gets freed by the dma free routines.
+         */
+        if (src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING)
+                ret = nvgpu_get_sgtable_from_pages(g, &dest->priv.sgt,
+                                                   src->priv.pages + start_page,
+                                                   new_iova, size);
+        else
+                ret = nvgpu_get_sgtable(g, &dest->priv.sgt, dest->cpu_va,
+                                        new_iova, size);
+        return ret;
+}
+int __nvgpu_mem_create_from_pages(struct gk20a *g, struct nvgpu_mem *dest,
+                                  struct page **pages, int nr_pages)
+{
+        struct sg_table *sgt;
+        struct page **our_pages =
+                nvgpu_kmalloc(g, sizeof(struct page *) * nr_pages);
+        if (!our_pages)
+                return -ENOMEM;
+        memcpy(our_pages, pages, sizeof(struct page *) * nr_pages);
+        if (nvgpu_get_sgtable_from_pages(g, &sgt, pages, 0,
+                                         nr_pages * PAGE_SIZE)) {
+                nvgpu_kfree(g, our_pages);
+                return -ENOMEM;
+        }
+        /*
+         * If we are making an SGT from physical pages we can be reasonably
+         * certain that this should bypass the SMMU - thus we set the DMA (aka
+         * IOVA) address to 0. This tells the GMMU mapping code to not make a
+         * mapping directed to the SMMU.
+         */
+        sg_dma_address(sgt->sgl) = 0;
+        dest->mem_flags  = __NVGPU_MEM_FLAG_NO_DMA;
+        dest->aperture   = APERTURE_SYSMEM;
+        dest->skip_wmb   = 0;
+        dest->size       = PAGE_SIZE * nr_pages;
+        dest->priv.flags = 0;
+        dest->priv.pages = our_pages;
+        dest->priv.sgt   = sgt;
+        return 0;
+}
+#ifdef CONFIG_TEGRA_GK20A_NVHOST
+int __nvgpu_mem_create_from_phys(struct gk20a *g, struct nvgpu_mem *dest,
+                                 u64 src_phys, int nr_pages)
+{
+        struct page **pages =
+                nvgpu_kmalloc(g, sizeof(struct page *) * nr_pages);
+        int i, ret = 0;
+        if (!pages)
+                return -ENOMEM;
+        for (i = 0; i < nr_pages; i++)
+                pages[i] = phys_to_page(src_phys + PAGE_SIZE * i);
+        ret = __nvgpu_mem_create_from_pages(g, dest, pages, nr_pages);
+        nvgpu_kfree(g, pages);
+        return ret;
+}
+#endif
+static struct nvgpu_sgl *nvgpu_mem_linux_sgl_next(struct nvgpu_sgl *sgl)
+{
+        return (struct nvgpu_sgl *)sg_next((struct scatterlist *)sgl);
+}
+static u64 nvgpu_mem_linux_sgl_phys(struct gk20a *g, struct nvgpu_sgl *sgl)
+{
+        return (u64)__nvgpu_sgl_phys(g, sgl);
+}
+static u64 nvgpu_mem_linux_sgl_dma(struct nvgpu_sgl *sgl)
+{
+        return (u64)sg_dma_address((struct scatterlist *)sgl);
+}
+static u64 nvgpu_mem_linux_sgl_length(struct nvgpu_sgl *sgl)
+{
+        return (u64)((struct scatterlist *)sgl)->length;
+}
+static u64 nvgpu_mem_linux_sgl_gpu_addr(struct gk20a *g,
+                                        struct nvgpu_sgl *sgl,
+                                        struct nvgpu_gmmu_attrs *attrs)
+{
+        if (sg_dma_address((struct scatterlist *)sgl) == 0)
+                return g->ops.mm.gpu_phys_addr(g, attrs,
+                                __nvgpu_sgl_phys(g, sgl));
+        if (sg_dma_address((struct scatterlist *)sgl) == DMA_ERROR_CODE)
+                return 0;
+        return nvgpu_mem_iommu_translate(g,
+                                sg_dma_address((struct scatterlist *)sgl));
+}
+static bool nvgpu_mem_linux_sgt_iommuable(struct gk20a *g,
+                                          struct nvgpu_sgt *sgt)
+{
+        if (nvgpu_is_enabled(g, NVGPU_MM_USE_PHYSICAL_SG))
+                return false;
+        return true;
+}
+static void nvgpu_mem_linux_sgl_free(struct gk20a *g, struct nvgpu_sgt *sgt)
+{
+        /*
+         * Free this SGT. All we do is free the passed SGT. The actual Linux
+         * SGT/SGL needs to be freed separately.
+         */
+        nvgpu_kfree(g, sgt);
+}
+static const struct nvgpu_sgt_ops nvgpu_linux_sgt_ops = {
+        .sgl_next      = nvgpu_mem_linux_sgl_next,
+        .sgl_phys      = nvgpu_mem_linux_sgl_phys,
+        .sgl_dma       = nvgpu_mem_linux_sgl_dma,
+        .sgl_length    = nvgpu_mem_linux_sgl_length,
+        .sgl_gpu_addr  = nvgpu_mem_linux_sgl_gpu_addr,
+        .sgt_iommuable = nvgpu_mem_linux_sgt_iommuable,
+        .sgt_free      = nvgpu_mem_linux_sgl_free,
+};
+static struct nvgpu_sgt *__nvgpu_mem_get_sgl_from_vidmem(
+        struct gk20a *g,
+        struct scatterlist *linux_sgl)
+{
+        struct nvgpu_page_alloc *vidmem_alloc;
+        vidmem_alloc = nvgpu_vidmem_get_page_alloc(linux_sgl);
+        if (!vidmem_alloc)
+                return NULL;
+        return &vidmem_alloc->sgt;
+}
+struct nvgpu_sgt *nvgpu_linux_sgt_create(struct gk20a *g, struct sg_table *sgt)
+{
+        struct nvgpu_sgt *nvgpu_sgt;
+        struct scatterlist *linux_sgl = sgt->sgl;
+        if (nvgpu_addr_is_vidmem_page_alloc(sg_dma_address(linux_sgl)))
+                return __nvgpu_mem_get_sgl_from_vidmem(g, linux_sgl);
+        nvgpu_sgt = nvgpu_kzalloc(g, sizeof(*nvgpu_sgt));
+        if (!nvgpu_sgt)
+                return NULL;
+        nvgpu_log(g, gpu_dbg_sgl, "Making Linux SGL!");
+        nvgpu_sgt->sgl = (struct nvgpu_sgl *)linux_sgl;
+        nvgpu_sgt->ops = &nvgpu_linux_sgt_ops;
+        return nvgpu_sgt;
+}
+struct nvgpu_sgt *nvgpu_sgt_create_from_mem(struct gk20a *g,
+                                            struct nvgpu_mem *mem)
+{
+        return nvgpu_linux_sgt_create(g, mem->priv.sgt);
+}

diff --git a/drivers/gpu/nvgpu/os/linux/nvgpu_mem.c b/drivers/gpu/nvgpu/os/linux/nvgpu_mem.c new file mode 100644 index 00000000..93925803 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/nvgpu_mem.c
@@ -0,0 +1,613 @@
	1	/*
	2	* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
	3	*
	4	* This program is free software; you can redistribute it and/or modify it
	5	* under the terms and conditions of the GNU General Public License,
	6	* version 2, as published by the Free Software Foundation.
	7	*
	8	* This program is distributed in the hope it will be useful, but WITHOUT
	9	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
	10	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
	11	* more details.
	12	*
	13	* You should have received a copy of the GNU General Public License
	14	* along with this program. If not, see <http://www.gnu.org/licenses/>.
	15	*/
	16
	17	#include <nvgpu/dma.h>
	18	#include <nvgpu/gmmu.h>
	19	#include <nvgpu/nvgpu_mem.h>
	20	#include <nvgpu/page_allocator.h>
	21	#include <nvgpu/log.h>
	22	#include <nvgpu/bug.h>
	23	#include <nvgpu/enabled.h>
	24	#include <nvgpu/kmem.h>
	25	#include <nvgpu/vidmem.h>
	26
	27	#include <nvgpu/linux/dma.h>
	28	#include <nvgpu/linux/vidmem.h>
	29
	30	#include <linux/vmalloc.h>
	31
	32	#include "os_linux.h"
	33
	34	#include "gk20a/gk20a.h"
	35	#include "gk20a/mm_gk20a.h"
	36	#include "platform_gk20a.h"
	37
	38	static u64 __nvgpu_sgl_phys(struct gk20a g, struct nvgpu_sgl sgl)
	39	{
	40	struct device *dev = dev_from_gk20a(g);
	41	struct gk20a_platform *platform = gk20a_get_platform(dev);
	42	u64 ipa = sg_phys((struct scatterlist *)sgl);
	43
	44	if (platform->phys_addr)
	45	return platform->phys_addr(g, ipa);
	46
	47	return ipa;
	48	}
	49
	50	int nvgpu_mem_begin(struct gk20a g, struct nvgpu_mem mem)
	51	{
	52	void *cpu_va;
	53	pgprot_t prot = nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) ?
	54	PAGE_KERNEL :
	55	pgprot_writecombine(PAGE_KERNEL);
	56
	57	if (mem->aperture != APERTURE_SYSMEM)
	58	return 0;
	59
	60	/*
	61	* WAR for bug 2040115: we already will always have a coherent vmap()
	62	* for all sysmem buffers. The prot settings are left alone since
	63	* eventually this should be deleted.
	64	*/
	65	if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
	66	return 0;
	67
	68	/*
	69	* A CPU mapping is implicitly made for all SYSMEM DMA allocations that
	70	* don't have NVGPU_DMA_NO_KERNEL_MAPPING. Thus we don't need to make
	71	* another CPU mapping.
	72	*/
	73	if (!(mem->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING))
	74	return 0;
	75
	76	if (WARN_ON(mem->cpu_va)) {
	77	nvgpu_warn(g, "nested");
	78	return -EBUSY;
	79	}
	80
	81	cpu_va = vmap(mem->priv.pages,
	82	PAGE_ALIGN(mem->size) >> PAGE_SHIFT,
	83	0, prot);
	84
	85	if (WARN_ON(!cpu_va))
	86	return -ENOMEM;
	87
	88	mem->cpu_va = cpu_va;
	89	return 0;
	90	}
	91
	92	void nvgpu_mem_end(struct gk20a g, struct nvgpu_mem mem)
	93	{
	94	if (mem->aperture != APERTURE_SYSMEM)
	95	return;
	96
	97	/*
	98	* WAR for bug 2040115: skip this since the map will be taken care of
	99	* during the free in the DMA API.
	100	*/
	101	if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
	102	return;
	103
	104	/*
	105	* Similar to nvgpu_mem_begin() we don't need to unmap the CPU mapping
	106	* already made by the DMA API.
	107	*/
	108	if (!(mem->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING))
	109	return;
	110
	111	vunmap(mem->cpu_va);
	112	mem->cpu_va = NULL;
	113	}
	114
	115	static void pramin_access_batch_rd_n(struct gk20a g, u32 start, u32 words, u32 *arg)
	116	{
	117	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
	118	u32 r = start, dest_u32 = arg;
	119
	120	if (!l->regs) {
	121	__gk20a_warn_on_no_regs();
	122	return;
	123	}
	124
	125	while (words--) {
	126	*dest_u32++ = gk20a_readl(g, r);
	127	r += sizeof(u32);
	128	}
	129
	130	*arg = dest_u32;
	131	}
	132
	133	u32 nvgpu_mem_rd32(struct gk20a g, struct nvgpu_mem mem, u32 w)
	134	{
	135	u32 data = 0;
	136
	137	if (mem->aperture == APERTURE_SYSMEM) {
	138	u32 *ptr = mem->cpu_va;
	139
	140	WARN_ON(!ptr);
	141	data = ptr[w];
	142	#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
	143	nvgpu_log(g, gpu_dbg_mem, " %p = 0x%x", ptr + w, data);
	144	#endif
	145	} else if (mem->aperture == APERTURE_VIDMEM) {
	146	u32 value;
	147	u32 *p = &value;
	148
	149	nvgpu_pramin_access_batched(g, mem, w * sizeof(u32),
	150	sizeof(u32), pramin_access_batch_rd_n, &p);
	151
	152	data = value;
	153
	154	} else {
	155	WARN_ON("Accessing unallocated nvgpu_mem");
	156	}
	157
	158	return data;
	159	}
	160
	161	u32 nvgpu_mem_rd(struct gk20a g, struct nvgpu_mem mem, u32 offset)
	162	{
	163	WARN_ON(offset & 3);
	164	return nvgpu_mem_rd32(g, mem, offset / sizeof(u32));
	165	}
	166
	167	void nvgpu_mem_rd_n(struct gk20a g, struct nvgpu_mem mem,
	168	u32 offset, void *dest, u32 size)
	169	{
	170	WARN_ON(offset & 3);
	171	WARN_ON(size & 3);
	172
	173	if (mem->aperture == APERTURE_SYSMEM) {
	174	u8 src = (u8 )mem->cpu_va + offset;
	175
	176	WARN_ON(!mem->cpu_va);
	177	memcpy(dest, src, size);
	178	#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
	179	if (size)
	180	nvgpu_log(g, gpu_dbg_mem, " %p = 0x%x ... [%d bytes]",
	181	src, *dest, size);
	182	#endif
	183	} else if (mem->aperture == APERTURE_VIDMEM) {
	184	u32 *dest_u32 = dest;
	185
	186	nvgpu_pramin_access_batched(g, mem, offset, size,
	187	pramin_access_batch_rd_n, &dest_u32);
	188	} else {
	189	WARN_ON("Accessing unallocated nvgpu_mem");
	190	}
	191	}
	192
	193	static void pramin_access_batch_wr_n(struct gk20a g, u32 start, u32 words, u32 *arg)
	194	{
	195	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
	196	u32 r = start, src_u32 = arg;
	197
	198	if (!l->regs) {
	199	__gk20a_warn_on_no_regs();
	200	return;
	201	}
	202
	203	while (words--) {
	204	writel_relaxed(*src_u32++, l->regs + r);
	205	r += sizeof(u32);
	206	}
	207
	208	*arg = src_u32;
	209	}
	210
	211	void nvgpu_mem_wr32(struct gk20a g, struct nvgpu_mem mem, u32 w, u32 data)
	212	{
	213	if (mem->aperture == APERTURE_SYSMEM) {
	214	u32 *ptr = mem->cpu_va;
	215
	216	WARN_ON(!ptr);
	217	#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
	218	nvgpu_log(g, gpu_dbg_mem, " %p = 0x%x", ptr + w, data);
	219	#endif
	220	ptr[w] = data;
	221	} else if (mem->aperture == APERTURE_VIDMEM) {
	222	u32 value = data;
	223	u32 *p = &value;
	224
	225	nvgpu_pramin_access_batched(g, mem, w * sizeof(u32),
	226	sizeof(u32), pramin_access_batch_wr_n, &p);
	227	if (!mem->skip_wmb)
	228	wmb();
	229	} else {
	230	WARN_ON("Accessing unallocated nvgpu_mem");
	231	}
	232	}
	233
	234	void nvgpu_mem_wr(struct gk20a g, struct nvgpu_mem mem, u32 offset, u32 data)
	235	{
	236	WARN_ON(offset & 3);
	237	nvgpu_mem_wr32(g, mem, offset / sizeof(u32), data);
	238	}
	239
	240	void nvgpu_mem_wr_n(struct gk20a g, struct nvgpu_mem mem, u32 offset,
	241	void *src, u32 size)
	242	{
	243	WARN_ON(offset & 3);
	244	WARN_ON(size & 3);
	245
	246	if (mem->aperture == APERTURE_SYSMEM) {
	247	u8 dest = (u8 )mem->cpu_va + offset;
	248
	249	WARN_ON(!mem->cpu_va);
	250	#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
	251	if (size)
	252	nvgpu_log(g, gpu_dbg_mem, " %p = 0x%x ... [%d bytes]",
	253	dest, *src, size);
	254	#endif
	255	memcpy(dest, src, size);
	256	} else if (mem->aperture == APERTURE_VIDMEM) {
	257	u32 *src_u32 = src;
	258
	259	nvgpu_pramin_access_batched(g, mem, offset, size,
	260	pramin_access_batch_wr_n, &src_u32);
	261	if (!mem->skip_wmb)
	262	wmb();
	263	} else {
	264	WARN_ON("Accessing unallocated nvgpu_mem");
	265	}
	266	}
	267
	268	static void pramin_access_batch_set(struct gk20a g, u32 start, u32 words, u32 *arg)
	269	{
	270	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
	271	u32 r = start, repeat = **arg;
	272
	273	if (!l->regs) {
	274	__gk20a_warn_on_no_regs();
	275	return;
	276	}
	277
	278	while (words--) {
	279	writel_relaxed(repeat, l->regs + r);
	280	r += sizeof(u32);
	281	}
	282	}
	283
	284	void nvgpu_memset(struct gk20a g, struct nvgpu_mem mem, u32 offset,
	285	u32 c, u32 size)
	286	{
	287	WARN_ON(offset & 3);
	288	WARN_ON(size & 3);
	289	WARN_ON(c & ~0xff);
	290
	291	c &= 0xff;
	292
	293	if (mem->aperture == APERTURE_SYSMEM) {
	294	u8 dest = (u8 )mem->cpu_va + offset;
	295
	296	WARN_ON(!mem->cpu_va);
	297	#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
	298	if (size)
	299	nvgpu_log(g, gpu_dbg_mem, " %p = 0x%x [times %d]",
	300	dest, c, size);
	301	#endif
	302	memset(dest, c, size);
	303	} else if (mem->aperture == APERTURE_VIDMEM) {
	304	u32 repeat_value = c \| (c << 8) \| (c << 16) \| (c << 24);
	305	u32 *p = &repeat_value;
	306
	307	nvgpu_pramin_access_batched(g, mem, offset, size,
	308	pramin_access_batch_set, &p);
	309	if (!mem->skip_wmb)
	310	wmb();
	311	} else {
	312	WARN_ON("Accessing unallocated nvgpu_mem");
	313	}
	314	}
	315
	316	/*
	317	* Obtain a SYSMEM address from a Linux SGL. This should eventually go away
	318	* and/or become private to this file once all bad usages of Linux SGLs are
	319	* cleaned up in the driver.
	320	*/
	321	u64 nvgpu_mem_get_addr_sgl(struct gk20a g, struct scatterlist sgl)
	322	{
	323	if (nvgpu_is_enabled(g, NVGPU_MM_USE_PHYSICAL_SG) \|\|
	324	!nvgpu_iommuable(g))
	325	return g->ops.mm.gpu_phys_addr(g, NULL,
	326	__nvgpu_sgl_phys(g, (struct nvgpu_sgl *)sgl));
	327
	328	if (sg_dma_address(sgl) == 0)
	329	return g->ops.mm.gpu_phys_addr(g, NULL,
	330	__nvgpu_sgl_phys(g, (struct nvgpu_sgl *)sgl));
	331
	332	if (sg_dma_address(sgl) == DMA_ERROR_CODE)
	333	return 0;
	334
	335	return nvgpu_mem_iommu_translate(g, sg_dma_address(sgl));
	336	}
	337
	338	/*
	339	* Obtain the address the GPU should use from the %mem assuming this is a SYSMEM
	340	* allocation.
	341	*/
	342	static u64 nvgpu_mem_get_addr_sysmem(struct gk20a g, struct nvgpu_mem mem)
	343	{
	344	return nvgpu_mem_get_addr_sgl(g, mem->priv.sgt->sgl);
	345	}
	346
	347	/*
	348	* Return the base address of %mem. Handles whether this is a VIDMEM or SYSMEM
	349	* allocation.
	350	*
	351	* Note: this API does not make sense to use for _VIDMEM_ buffers with greater
	352	* than one scatterlist chunk. If there's more than one scatterlist chunk then
	353	* the buffer will not be contiguous. As such the base address probably isn't
	354	* very useful. This is true for SYSMEM as well, if there's no IOMMU.
	355	*
	356	* However! It _is_ OK to use this on discontiguous sysmem buffers _if_ there's
	357	* an IOMMU present and enabled for the GPU.
	358	*
	359	* %attrs can be NULL. If it is not NULL then it may be inspected to determine
	360	* if the address needs to be modified before writing into a PTE.
	361	*/
	362	u64 nvgpu_mem_get_addr(struct gk20a g, struct nvgpu_mem mem)
	363	{
	364	struct nvgpu_page_alloc *alloc;
	365
	366	if (mem->aperture == APERTURE_SYSMEM)
	367	return nvgpu_mem_get_addr_sysmem(g, mem);
	368
	369	/*
	370	* Otherwise get the vidmem address.
	371	*/
	372	alloc = mem->vidmem_alloc;
	373
	374	/* This API should not be used with > 1 chunks */
	375	WARN_ON(alloc->nr_chunks != 1);
	376
	377	return alloc->base;
	378	}
	379
	380	/*
	381	* This should only be used on contiguous buffers regardless of whether
	382	* there's an IOMMU present/enabled. This applies to both SYSMEM and
	383	* VIDMEM.
	384	*/
	385	u64 nvgpu_mem_get_phys_addr(struct gk20a g, struct nvgpu_mem mem)
	386	{
	387	/*
	388	* For a VIDMEM buf, this is identical to simply get_addr() so just fall
	389	* back to that.
	390	*/
	391	if (mem->aperture == APERTURE_VIDMEM)
	392	return nvgpu_mem_get_addr(g, mem);
	393
	394	return __nvgpu_sgl_phys(g, (struct nvgpu_sgl *)mem->priv.sgt->sgl);
	395	}
	396
	397	/*
	398	* Be careful how you use this! You are responsible for correctly freeing this
	399	* memory.
	400	*/
	401	int nvgpu_mem_create_from_mem(struct gk20a *g,
	402	struct nvgpu_mem dest, struct nvgpu_mem src,
	403	int start_page, int nr_pages)
	404	{
	405	int ret;
	406	u64 start = start_page * PAGE_SIZE;
	407	u64 size = nr_pages * PAGE_SIZE;
	408	dma_addr_t new_iova;
	409
	410	if (src->aperture != APERTURE_SYSMEM)
	411	return -EINVAL;
	412
	413	/* Some silly things a caller might do... */
	414	if (size > src->size)
	415	return -EINVAL;
	416	if ((start + size) > src->size)
	417	return -EINVAL;
	418
	419	dest->mem_flags = src->mem_flags \| NVGPU_MEM_FLAG_SHADOW_COPY;
	420	dest->aperture = src->aperture;
	421	dest->skip_wmb = src->skip_wmb;
	422	dest->size = size;
	423
	424	/*
	425	* Re-use the CPU mapping only if the mapping was made by the DMA API.
	426	*
	427	* Bug 2040115: the DMA API wrapper makes the mapping that we should
	428	* re-use.
	429	*/
	430	if (!(src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING) \|\|
	431	nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
	432	dest->cpu_va = src->cpu_va + (PAGE_SIZE * start_page);
	433
	434	dest->priv.pages = src->priv.pages + start_page;
	435	dest->priv.flags = src->priv.flags;
	436
	437	new_iova = sg_dma_address(src->priv.sgt->sgl) ?
	438	sg_dma_address(src->priv.sgt->sgl) + start : 0;
	439
	440	/*
	441	* Make a new SG table that is based only on the subset of pages that
	442	* is passed to us. This table gets freed by the dma free routines.
	443	*/
	444	if (src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING)
	445	ret = nvgpu_get_sgtable_from_pages(g, &dest->priv.sgt,
	446	src->priv.pages + start_page,
	447	new_iova, size);
	448	else
	449	ret = nvgpu_get_sgtable(g, &dest->priv.sgt, dest->cpu_va,
	450	new_iova, size);
	451
	452	return ret;
	453	}
	454
	455	int __nvgpu_mem_create_from_pages(struct gk20a g, struct nvgpu_mem dest,
	456	struct page **pages, int nr_pages)
	457	{
	458	struct sg_table *sgt;
	459	struct page **our_pages =
	460	nvgpu_kmalloc(g, sizeof(struct page ) nr_pages);
	461
	462	if (!our_pages)
	463	return -ENOMEM;
	464
	465	memcpy(our_pages, pages, sizeof(struct page ) nr_pages);
	466
	467	if (nvgpu_get_sgtable_from_pages(g, &sgt, pages, 0,
	468	nr_pages * PAGE_SIZE)) {
	469	nvgpu_kfree(g, our_pages);
	470	return -ENOMEM;
	471	}
	472
	473	/*
	474	* If we are making an SGT from physical pages we can be reasonably
	475	* certain that this should bypass the SMMU - thus we set the DMA (aka
	476	* IOVA) address to 0. This tells the GMMU mapping code to not make a
	477	* mapping directed to the SMMU.
	478	*/
	479	sg_dma_address(sgt->sgl) = 0;
	480
	481	dest->mem_flags = __NVGPU_MEM_FLAG_NO_DMA;
	482	dest->aperture = APERTURE_SYSMEM;
	483	dest->skip_wmb = 0;
	484	dest->size = PAGE_SIZE * nr_pages;
	485
	486	dest->priv.flags = 0;
	487	dest->priv.pages = our_pages;
	488	dest->priv.sgt = sgt;
	489
	490	return 0;
	491	}
	492
	493	#ifdef CONFIG_TEGRA_GK20A_NVHOST
	494	int __nvgpu_mem_create_from_phys(struct gk20a g, struct nvgpu_mem dest,
	495	u64 src_phys, int nr_pages)
	496	{
	497	struct page **pages =
	498	nvgpu_kmalloc(g, sizeof(struct page ) nr_pages);
	499	int i, ret = 0;
	500
	501	if (!pages)
	502	return -ENOMEM;
	503
	504	for (i = 0; i < nr_pages; i++)
	505	pages[i] = phys_to_page(src_phys + PAGE_SIZE * i);
	506
	507	ret = __nvgpu_mem_create_from_pages(g, dest, pages, nr_pages);
	508	nvgpu_kfree(g, pages);
	509
	510	return ret;
	511	}
	512	#endif
	513
	514	static struct nvgpu_sgl nvgpu_mem_linux_sgl_next(struct nvgpu_sgl sgl)
	515	{
	516	return (struct nvgpu_sgl )sg_next((struct scatterlist )sgl);
	517	}
	518
	519	static u64 nvgpu_mem_linux_sgl_phys(struct gk20a g, struct nvgpu_sgl sgl)
	520	{
	521	return (u64)__nvgpu_sgl_phys(g, sgl);
	522	}
	523
	524	static u64 nvgpu_mem_linux_sgl_dma(struct nvgpu_sgl *sgl)
	525	{
	526	return (u64)sg_dma_address((struct scatterlist *)sgl);
	527	}
	528
	529	static u64 nvgpu_mem_linux_sgl_length(struct nvgpu_sgl *sgl)
	530	{
	531	return (u64)((struct scatterlist *)sgl)->length;
	532	}
	533
	534	static u64 nvgpu_mem_linux_sgl_gpu_addr(struct gk20a *g,
	535	struct nvgpu_sgl *sgl,
	536	struct nvgpu_gmmu_attrs *attrs)
	537	{
	538	if (sg_dma_address((struct scatterlist *)sgl) == 0)
	539	return g->ops.mm.gpu_phys_addr(g, attrs,
	540	__nvgpu_sgl_phys(g, sgl));
	541
	542	if (sg_dma_address((struct scatterlist *)sgl) == DMA_ERROR_CODE)
	543	return 0;
	544
	545	return nvgpu_mem_iommu_translate(g,
	546	sg_dma_address((struct scatterlist *)sgl));
	547	}
	548
	549	static bool nvgpu_mem_linux_sgt_iommuable(struct gk20a *g,
	550	struct nvgpu_sgt *sgt)
	551	{
	552	if (nvgpu_is_enabled(g, NVGPU_MM_USE_PHYSICAL_SG))
	553	return false;
	554	return true;
	555	}
	556
	557	static void nvgpu_mem_linux_sgl_free(struct gk20a g, struct nvgpu_sgt sgt)
	558	{
	559	/*
	560	* Free this SGT. All we do is free the passed SGT. The actual Linux
	561	* SGT/SGL needs to be freed separately.
	562	*/
	563	nvgpu_kfree(g, sgt);
	564	}
	565
	566	static const struct nvgpu_sgt_ops nvgpu_linux_sgt_ops = {
	567	.sgl_next = nvgpu_mem_linux_sgl_next,
	568	.sgl_phys = nvgpu_mem_linux_sgl_phys,
	569	.sgl_dma = nvgpu_mem_linux_sgl_dma,
	570	.sgl_length = nvgpu_mem_linux_sgl_length,
	571	.sgl_gpu_addr = nvgpu_mem_linux_sgl_gpu_addr,
	572	.sgt_iommuable = nvgpu_mem_linux_sgt_iommuable,
	573	.sgt_free = nvgpu_mem_linux_sgl_free,
	574	};
	575
	576	static struct nvgpu_sgt *__nvgpu_mem_get_sgl_from_vidmem(
	577	struct gk20a *g,
	578	struct scatterlist *linux_sgl)
	579	{
	580	struct nvgpu_page_alloc *vidmem_alloc;
	581
	582	vidmem_alloc = nvgpu_vidmem_get_page_alloc(linux_sgl);
	583	if (!vidmem_alloc)
	584	return NULL;
	585
	586	return &vidmem_alloc->sgt;
	587	}
	588
	589	struct nvgpu_sgt nvgpu_linux_sgt_create(struct gk20a g, struct sg_table *sgt)
	590	{
	591	struct nvgpu_sgt *nvgpu_sgt;
	592	struct scatterlist *linux_sgl = sgt->sgl;
	593
	594	if (nvgpu_addr_is_vidmem_page_alloc(sg_dma_address(linux_sgl)))
	595	return __nvgpu_mem_get_sgl_from_vidmem(g, linux_sgl);
	596
	597	nvgpu_sgt = nvgpu_kzalloc(g, sizeof(*nvgpu_sgt));
	598	if (!nvgpu_sgt)
	599	return NULL;
	600
	601	nvgpu_log(g, gpu_dbg_sgl, "Making Linux SGL!");
	602
	603	nvgpu_sgt->sgl = (struct nvgpu_sgl *)linux_sgl;
	604	nvgpu_sgt->ops = &nvgpu_linux_sgt_ops;
	605
	606	return nvgpu_sgt;
	607	}
	608
	609	struct nvgpu_sgt nvgpu_sgt_create_from_mem(struct gk20a g,
	610	struct nvgpu_mem *mem)
	611	{
	612	return nvgpu_linux_sgt_create(g, mem->priv.sgt);
	613	}