1 files changed, 594 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
new file mode 100644
index 00000000..2587d56a
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
@@ -0,0 +1,594 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <nvgpu/dma.h>
+#include <nvgpu/gmmu.h>
+#include <nvgpu/nvgpu_mem.h>
+#include <nvgpu/page_allocator.h>
+#include <nvgpu/log.h>
+#include <nvgpu/bug.h>
+#include <nvgpu/enabled.h>
+#include <nvgpu/kmem.h>
+#include <nvgpu/vidmem.h>
+#include <nvgpu/linux/dma.h>
+#include <nvgpu/linux/vidmem.h>
+#include "os_linux.h"
+#include "gk20a/gk20a.h"
+#include "gk20a/mm_gk20a.h"
+u32 __nvgpu_aperture_mask(struct gk20a *g, enum nvgpu_aperture aperture,
+                u32 sysmem_mask, u32 vidmem_mask)
+{
+        switch (aperture) {
+        case APERTURE_SYSMEM:
+                /* some igpus consider system memory vidmem */
+                return nvgpu_is_enabled(g, NVGPU_MM_HONORS_APERTURE)
+                        ? sysmem_mask : vidmem_mask;
+        case APERTURE_VIDMEM:
+                /* for dgpus only */
+                return vidmem_mask;
+        case APERTURE_INVALID:
+                WARN_ON("Bad aperture");
+        }
+        return 0;
+}
+u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem,
+                u32 sysmem_mask, u32 vidmem_mask)
+{
+        return __nvgpu_aperture_mask(g, mem->aperture,
+                        sysmem_mask, vidmem_mask);
+}
+int nvgpu_mem_begin(struct gk20a *g, struct nvgpu_mem *mem)
+{
+        void *cpu_va;
+        if (mem->aperture != APERTURE_SYSMEM || g->mm.force_pramin)
+                return 0;
+        /*
+         * A CPU mapping is implicitly made for all SYSMEM DMA allocations that
+         * don't have NVGPU_DMA_NO_KERNEL_MAPPING. Thus we don't need to make
+         * another CPU mapping.
+         */
+        if (!(mem->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING))
+                return 0;
+        if (WARN_ON(mem->cpu_va)) {
+                nvgpu_warn(g, "nested");
+                return -EBUSY;
+        }
+        cpu_va = vmap(mem->priv.pages,
+                        PAGE_ALIGN(mem->size) >> PAGE_SHIFT,
+                        0, pgprot_writecombine(PAGE_KERNEL));
+        if (WARN_ON(!cpu_va))
+                return -ENOMEM;
+        mem->cpu_va = cpu_va;
+        return 0;
+}
+void nvgpu_mem_end(struct gk20a *g, struct nvgpu_mem *mem)
+{
+        if (mem->aperture != APERTURE_SYSMEM || g->mm.force_pramin)
+                return;
+        /*
+         * Similar to nvgpu_mem_begin() we don't need to unmap the CPU mapping
+         * already made by the DMA API.
+         */
+        if (!(mem->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING))
+                return;
+        vunmap(mem->cpu_va);
+        mem->cpu_va = NULL;
+}
+static void pramin_access_batch_rd_n(struct gk20a *g, u32 start, u32 words, u32 **arg)
+{
+        struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+        u32 r = start, *dest_u32 = *arg;
+        if (!l->regs) {
+                __gk20a_warn_on_no_regs();
+                return;
+        }
+        while (words--) {
+                *dest_u32++ = gk20a_readl(g, r);
+                r += sizeof(u32);
+        }
+        *arg = dest_u32;
+}
+u32 nvgpu_mem_rd32(struct gk20a *g, struct nvgpu_mem *mem, u32 w)
+{
+        u32 data = 0;
+        if (mem->aperture == APERTURE_SYSMEM && !g->mm.force_pramin) {
+                u32 *ptr = mem->cpu_va;
+                WARN_ON(!ptr);
+                data = ptr[w];
+#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
+                gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr + w, data);
+#endif
+        } else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) {
+                u32 value;
+                u32 *p = &value;
+                nvgpu_pramin_access_batched(g, mem, w * sizeof(u32),
+                                sizeof(u32), pramin_access_batch_rd_n, &p);
+                data = value;
+        } else {
+                WARN_ON("Accessing unallocated nvgpu_mem");
+        }
+        return data;
+}
+u32 nvgpu_mem_rd(struct gk20a *g, struct nvgpu_mem *mem, u32 offset)
+{
+        WARN_ON(offset & 3);
+        return nvgpu_mem_rd32(g, mem, offset / sizeof(u32));
+}
+void nvgpu_mem_rd_n(struct gk20a *g, struct nvgpu_mem *mem,
+                u32 offset, void *dest, u32 size)
+{
+        WARN_ON(offset & 3);
+        WARN_ON(size & 3);
+        if (mem->aperture == APERTURE_SYSMEM && !g->mm.force_pramin) {
+                u8 *src = (u8 *)mem->cpu_va + offset;
+                WARN_ON(!mem->cpu_va);
+                memcpy(dest, src, size);
+#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
+                if (size)
+                        gk20a_dbg(gpu_dbg_mem, " %p = 0x%x ... [%d bytes]",
+                                        src, *dest, size);
+#endif
+        } else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) {
+                u32 *dest_u32 = dest;
+                nvgpu_pramin_access_batched(g, mem, offset, size,
+                                pramin_access_batch_rd_n, &dest_u32);
+        } else {
+                WARN_ON("Accessing unallocated nvgpu_mem");
+        }
+}
+static void pramin_access_batch_wr_n(struct gk20a *g, u32 start, u32 words, u32 **arg)
+{
+        struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+        u32 r = start, *src_u32 = *arg;
+        if (!l->regs) {
+                __gk20a_warn_on_no_regs();
+                return;
+        }
+        while (words--) {
+                writel_relaxed(*src_u32++, l->regs + r);
+                r += sizeof(u32);
+        }
+        *arg = src_u32;
+}
+void nvgpu_mem_wr32(struct gk20a *g, struct nvgpu_mem *mem, u32 w, u32 data)
+{
+        if (mem->aperture == APERTURE_SYSMEM && !g->mm.force_pramin) {
+                u32 *ptr = mem->cpu_va;
+                WARN_ON(!ptr);
+#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
+                gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr + w, data);
+#endif
+                ptr[w] = data;
+        } else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) {
+                u32 value = data;
+                u32 *p = &value;
+                nvgpu_pramin_access_batched(g, mem, w * sizeof(u32),
+                                sizeof(u32), pramin_access_batch_wr_n, &p);
+                if (!mem->skip_wmb)
+                        wmb();
+        } else {
+                WARN_ON("Accessing unallocated nvgpu_mem");
+        }
+}
+void nvgpu_mem_wr(struct gk20a *g, struct nvgpu_mem *mem, u32 offset, u32 data)
+{
+        WARN_ON(offset & 3);
+        nvgpu_mem_wr32(g, mem, offset / sizeof(u32), data);
+}
+void nvgpu_mem_wr_n(struct gk20a *g, struct nvgpu_mem *mem, u32 offset,
+                void *src, u32 size)
+{
+        WARN_ON(offset & 3);
+        WARN_ON(size & 3);
+        if (mem->aperture == APERTURE_SYSMEM && !g->mm.force_pramin) {
+                u8 *dest = (u8 *)mem->cpu_va + offset;
+                WARN_ON(!mem->cpu_va);
+#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
+                if (size)
+                        gk20a_dbg(gpu_dbg_mem, " %p = 0x%x ... [%d bytes]",
+                                        dest, *src, size);
+#endif
+                memcpy(dest, src, size);
+        } else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) {
+                u32 *src_u32 = src;
+                nvgpu_pramin_access_batched(g, mem, offset, size,
+                                pramin_access_batch_wr_n, &src_u32);
+                if (!mem->skip_wmb)
+                        wmb();
+        } else {
+                WARN_ON("Accessing unallocated nvgpu_mem");
+        }
+}
+static void pramin_access_batch_set(struct gk20a *g, u32 start, u32 words, u32 **arg)
+{
+        struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+        u32 r = start, repeat = **arg;
+        if (!l->regs) {
+                __gk20a_warn_on_no_regs();
+                return;
+        }
+        while (words--) {
+                writel_relaxed(repeat, l->regs + r);
+                r += sizeof(u32);
+        }
+}
+void nvgpu_memset(struct gk20a *g, struct nvgpu_mem *mem, u32 offset,
+                u32 c, u32 size)
+{
+        WARN_ON(offset & 3);
+        WARN_ON(size & 3);
+        WARN_ON(c & ~0xff);
+        c &= 0xff;
+        if (mem->aperture == APERTURE_SYSMEM && !g->mm.force_pramin) {
+                u8 *dest = (u8 *)mem->cpu_va + offset;
+                WARN_ON(!mem->cpu_va);
+#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
+                if (size)
+                        gk20a_dbg(gpu_dbg_mem, " %p = 0x%x [times %d]",
+                                dest, c, size);
+#endif
+                memset(dest, c, size);
+        } else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) {
+                u32 repeat_value = c | (c << 8) | (c << 16) | (c << 24);
+                u32 *p = &repeat_value;
+                nvgpu_pramin_access_batched(g, mem, offset, size,
+                                pramin_access_batch_set, &p);
+                if (!mem->skip_wmb)
+                        wmb();
+        } else {
+                WARN_ON("Accessing unallocated nvgpu_mem");
+        }
+}
+/*
+ * Obtain a SYSMEM address from a Linux SGL. This should eventually go away
+ * and/or become private to this file once all bad usages of Linux SGLs are
+ * cleaned up in the driver.
+ */
+u64 nvgpu_mem_get_addr_sgl(struct gk20a *g, struct scatterlist *sgl)
+{
+        struct nvgpu_os_linux *l = container_of(g, struct nvgpu_os_linux, g);
+        if (!device_is_iommuable(l->dev))
+                return g->ops.mm.gpu_phys_addr(g, NULL, sg_phys(sgl));
+        if (sg_dma_address(sgl) == 0)
+                return g->ops.mm.gpu_phys_addr(g, NULL, sg_phys(sgl));
+        if (sg_dma_address(sgl) == DMA_ERROR_CODE)
+                return 0;
+        return nvgpu_mem_iommu_translate(g, sg_dma_address(sgl));
+}
+/*
+ * Obtain the address the GPU should use from the %mem assuming this is a SYSMEM
+ * allocation.
+ */
+static u64 nvgpu_mem_get_addr_sysmem(struct gk20a *g, struct nvgpu_mem *mem)
+{
+        return nvgpu_mem_get_addr_sgl(g, mem->priv.sgt->sgl);
+}
+/*
+ * Return the base address of %mem. Handles whether this is a VIDMEM or SYSMEM
+ * allocation.
+ *
+ * Note: this API does not make sense to use for _VIDMEM_ buffers with greater
+ * than one scatterlist chunk. If there's more than one scatterlist chunk then
+ * the buffer will not be contiguous. As such the base address probably isn't
+ * very useful. This is true for SYSMEM as well, if there's no IOMMU.
+ *
+ * However! It _is_ OK to use this on discontiguous sysmem buffers _if_ there's
+ * an IOMMU present and enabled for the GPU.
+ *
+ * %attrs can be NULL. If it is not NULL then it may be inspected to determine
+ * if the address needs to be modified before writing into a PTE.
+ */
+u64 nvgpu_mem_get_addr(struct gk20a *g, struct nvgpu_mem *mem)
+{
+        struct nvgpu_page_alloc *alloc;
+        if (mem->aperture == APERTURE_SYSMEM)
+                return nvgpu_mem_get_addr_sysmem(g, mem);
+        /*
+         * Otherwise get the vidmem address.
+         */
+        alloc = mem->vidmem_alloc;
+        /* This API should not be used with > 1 chunks */
+        WARN_ON(alloc->nr_chunks != 1);
+        return alloc->base;
+}
+/*
+ * This should only be used on contiguous buffers regardless of whether
+ * there's an IOMMU present/enabled. This applies to both SYSMEM and
+ * VIDMEM.
+ */
+u64 nvgpu_mem_get_phys_addr(struct gk20a *g, struct nvgpu_mem *mem)
+{
+        /*
+         * For a VIDMEM buf, this is identical to simply get_addr() so just fall
+         * back to that.
+         */
+        if (mem->aperture == APERTURE_VIDMEM)
+                return nvgpu_mem_get_addr(g, mem);
+        return sg_phys(mem->priv.sgt->sgl);
+}
+/*
+ * Be careful how you use this! You are responsible for correctly freeing this
+ * memory.
+ */
+int nvgpu_mem_create_from_mem(struct gk20a *g,
+                              struct nvgpu_mem *dest, struct nvgpu_mem *src,
+                              int start_page, int nr_pages)
+{
+        int ret;
+        u64 start = start_page * PAGE_SIZE;
+        u64 size = nr_pages * PAGE_SIZE;
+        dma_addr_t new_iova;
+        if (src->aperture != APERTURE_SYSMEM)
+                return -EINVAL;
+        /* Some silly things a caller might do... */
+        if (size > src->size)
+                return -EINVAL;
+        if ((start + size) > src->size)
+                return -EINVAL;
+        dest->mem_flags = src->mem_flags | NVGPU_MEM_FLAG_SHADOW_COPY;
+        dest->aperture  = src->aperture;
+        dest->skip_wmb  = src->skip_wmb;
+        dest->size      = size;
+        /*
+         * Re-use the CPU mapping only if the mapping was made by the DMA API.
+         */
+        if (!(src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING))
+                dest->cpu_va = src->cpu_va + (PAGE_SIZE * start_page);
+        dest->priv.pages = src->priv.pages + start_page;
+        dest->priv.flags = src->priv.flags;
+        new_iova = sg_dma_address(src->priv.sgt->sgl) ?
+                sg_dma_address(src->priv.sgt->sgl) + start : 0;
+        /*
+         * Make a new SG table that is based only on the subset of pages that
+         * is passed to us. This table gets freed by the dma free routines.
+         */
+        if (src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING)
+                ret = nvgpu_get_sgtable_from_pages(g, &dest->priv.sgt,
+                                                   src->priv.pages + start_page,
+                                                   new_iova, size);
+        else
+                ret = nvgpu_get_sgtable(g, &dest->priv.sgt, dest->cpu_va,
+                                        new_iova, size);
+        return ret;
+}
+int __nvgpu_mem_create_from_pages(struct gk20a *g, struct nvgpu_mem *dest,
+                                  struct page **pages, int nr_pages)
+{
+        struct sg_table *sgt;
+        struct page **our_pages =
+                nvgpu_kmalloc(g, sizeof(struct page *) * nr_pages);
+        if (!our_pages)
+                return -ENOMEM;
+        memcpy(our_pages, pages, sizeof(struct page *) * nr_pages);
+        if (nvgpu_get_sgtable_from_pages(g, &sgt, pages, 0,
+                                         nr_pages * PAGE_SIZE)) {
+                nvgpu_kfree(g, our_pages);
+                return -ENOMEM;
+        }
+        /*
+         * If we are making an SGT from physical pages we can be reasonably
+         * certain that this should bypass the SMMU - thus we set the DMA (aka
+         * IOVA) address to 0. This tells the GMMU mapping code to not make a
+         * mapping directed to the SMMU.
+         */
+        sg_dma_address(sgt->sgl) = 0;
+        dest->mem_flags  = __NVGPU_MEM_FLAG_NO_DMA;
+        dest->aperture   = APERTURE_SYSMEM;
+        dest->skip_wmb   = 0;
+        dest->size       = PAGE_SIZE * nr_pages;
+        dest->priv.flags = 0;
+        dest->priv.pages = our_pages;
+        dest->priv.sgt   = sgt;
+        return 0;
+}
+int __nvgpu_mem_create_from_phys(struct gk20a *g, struct nvgpu_mem *dest,
+                                 u64 src_phys, int nr_pages)
+{
+        struct page **pages =
+                nvgpu_kmalloc(g, sizeof(struct page *) * nr_pages);
+        int i, ret = 0;
+        if (!pages)
+                return -ENOMEM;
+        for (i = 0; i < nr_pages; i++)
+                pages[i] = phys_to_page(src_phys + PAGE_SIZE * i);
+        ret = __nvgpu_mem_create_from_pages(g, dest, pages, nr_pages);
+        nvgpu_kfree(g, pages);
+        return ret;
+}
+static void *nvgpu_mem_linux_sgl_next(void *sgl)
+{
+        return sg_next((struct scatterlist *)sgl);
+}
+static u64 nvgpu_mem_linux_sgl_phys(void *sgl)
+{
+        return (u64)sg_phys((struct scatterlist *)sgl);
+}
+static u64 nvgpu_mem_linux_sgl_dma(void *sgl)
+{
+        return (u64)sg_dma_address((struct scatterlist *)sgl);
+}
+static u64 nvgpu_mem_linux_sgl_length(void *sgl)
+{
+        return (u64)((struct scatterlist *)sgl)->length;
+}
+static u64 nvgpu_mem_linux_sgl_gpu_addr(struct gk20a *g, void *sgl,
+                                        struct nvgpu_gmmu_attrs *attrs)
+{
+        if (sg_dma_address((struct scatterlist *)sgl) == 0)
+                return g->ops.mm.gpu_phys_addr(g, attrs,
+                        sg_phys((struct scatterlist *)sgl));
+        if (sg_dma_address((struct scatterlist *)sgl) == DMA_ERROR_CODE)
+                return 0;
+        return nvgpu_mem_iommu_translate(g,
+                                sg_dma_address((struct scatterlist *)sgl));
+}
+static bool nvgpu_mem_linux_sgt_iommuable(struct gk20a *g,
+                                          struct nvgpu_sgt *sgt)
+{
+        return true;
+}
+static void nvgpu_mem_linux_sgl_free(struct gk20a *g, struct nvgpu_sgt *sgt)
+{
+        /*
+         * Free this SGT. All we do is free the passed SGT. The actual Linux
+         * SGT/SGL needs to be freed separately.
+         */
+        nvgpu_kfree(g, sgt);
+}
+static const struct nvgpu_sgt_ops nvgpu_linux_sgt_ops = {
+        .sgl_next      = nvgpu_mem_linux_sgl_next,
+        .sgl_phys      = nvgpu_mem_linux_sgl_phys,
+        .sgl_dma       = nvgpu_mem_linux_sgl_dma,
+        .sgl_length    = nvgpu_mem_linux_sgl_length,
+        .sgl_gpu_addr  = nvgpu_mem_linux_sgl_gpu_addr,
+        .sgt_iommuable = nvgpu_mem_linux_sgt_iommuable,
+        .sgt_free      = nvgpu_mem_linux_sgl_free,
+};
+static struct nvgpu_sgt *__nvgpu_mem_get_sgl_from_vidmem(
+        struct gk20a *g,
+        struct scatterlist *linux_sgl)
+{
+        struct nvgpu_page_alloc *vidmem_alloc;
+        vidmem_alloc = nvgpu_vidmem_get_page_alloc(linux_sgl);
+        if (!vidmem_alloc)
+                return NULL;
+        return &vidmem_alloc->sgt;
+}
+struct nvgpu_sgt *nvgpu_linux_sgt_create(struct gk20a *g, struct sg_table *sgt)
+{
+        struct nvgpu_sgt *nvgpu_sgt;
+        struct scatterlist *linux_sgl = sgt->sgl;
+        if (nvgpu_addr_is_vidmem_page_alloc(sg_dma_address(linux_sgl)))
+                return __nvgpu_mem_get_sgl_from_vidmem(g, linux_sgl);
+        nvgpu_sgt = nvgpu_kzalloc(g, sizeof(*nvgpu_sgt));
+        if (!nvgpu_sgt)
+                return NULL;
+        nvgpu_log(g, gpu_dbg_sgl, "Making Linux SGL!");
+        nvgpu_sgt->sgl = sgt->sgl;
+        nvgpu_sgt->ops = &nvgpu_linux_sgt_ops;
+        return nvgpu_sgt;
+}
+struct nvgpu_sgt *nvgpu_sgt_create_from_mem(struct gk20a *g,
+                                            struct nvgpu_mem *mem)
+{
+        return nvgpu_linux_sgt_create(g, mem->priv.sgt);
+}

diff --git a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c new file mode 100644 index 00000000..2587d56a --- /dev/null +++ b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
@@ -0,0 +1,594 @@
	1	/*
	2	* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
	3	*
	4	* This program is free software; you can redistribute it and/or modify it
	5	* under the terms and conditions of the GNU General Public License,
	6	* version 2, as published by the Free Software Foundation.
	7	*
	8	* This program is distributed in the hope it will be useful, but WITHOUT
	9	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
	10	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
	11	* more details.
	12	*
	13	* You should have received a copy of the GNU General Public License
	14	* along with this program. If not, see <http://www.gnu.org/licenses/>.
	15	*/
	16
	17	#include <nvgpu/dma.h>
	18	#include <nvgpu/gmmu.h>
	19	#include <nvgpu/nvgpu_mem.h>
	20	#include <nvgpu/page_allocator.h>
	21	#include <nvgpu/log.h>
	22	#include <nvgpu/bug.h>
	23	#include <nvgpu/enabled.h>
	24	#include <nvgpu/kmem.h>
	25	#include <nvgpu/vidmem.h>
	26
	27	#include <nvgpu/linux/dma.h>
	28	#include <nvgpu/linux/vidmem.h>
	29
	30	#include "os_linux.h"
	31
	32	#include "gk20a/gk20a.h"
	33	#include "gk20a/mm_gk20a.h"
	34
	35	u32 __nvgpu_aperture_mask(struct gk20a *g, enum nvgpu_aperture aperture,
	36	u32 sysmem_mask, u32 vidmem_mask)
	37	{
	38	switch (aperture) {
	39	case APERTURE_SYSMEM:
	40	/* some igpus consider system memory vidmem */
	41	return nvgpu_is_enabled(g, NVGPU_MM_HONORS_APERTURE)
	42	? sysmem_mask : vidmem_mask;
	43	case APERTURE_VIDMEM:
	44	/* for dgpus only */
	45	return vidmem_mask;
	46	case APERTURE_INVALID:
	47	WARN_ON("Bad aperture");
	48	}
	49	return 0;
	50	}
	51
	52	u32 nvgpu_aperture_mask(struct gk20a g, struct nvgpu_mem mem,
	53	u32 sysmem_mask, u32 vidmem_mask)
	54	{
	55	return __nvgpu_aperture_mask(g, mem->aperture,
	56	sysmem_mask, vidmem_mask);
	57	}
	58
	59	int nvgpu_mem_begin(struct gk20a g, struct nvgpu_mem mem)
	60	{
	61	void *cpu_va;
	62
	63	if (mem->aperture != APERTURE_SYSMEM \|\| g->mm.force_pramin)
	64	return 0;
	65
	66	/*
	67	* A CPU mapping is implicitly made for all SYSMEM DMA allocations that
	68	* don't have NVGPU_DMA_NO_KERNEL_MAPPING. Thus we don't need to make
	69	* another CPU mapping.
	70	*/
	71	if (!(mem->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING))
	72	return 0;
	73
	74	if (WARN_ON(mem->cpu_va)) {
	75	nvgpu_warn(g, "nested");
	76	return -EBUSY;
	77	}
	78
	79	cpu_va = vmap(mem->priv.pages,
	80	PAGE_ALIGN(mem->size) >> PAGE_SHIFT,
	81	0, pgprot_writecombine(PAGE_KERNEL));
	82
	83	if (WARN_ON(!cpu_va))
	84	return -ENOMEM;
	85
	86	mem->cpu_va = cpu_va;
	87	return 0;
	88	}
	89
	90	void nvgpu_mem_end(struct gk20a g, struct nvgpu_mem mem)
	91	{
	92	if (mem->aperture != APERTURE_SYSMEM \|\| g->mm.force_pramin)
	93	return;
	94
	95	/*
	96	* Similar to nvgpu_mem_begin() we don't need to unmap the CPU mapping
	97	* already made by the DMA API.
	98	*/
	99	if (!(mem->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING))
	100	return;
	101
	102	vunmap(mem->cpu_va);
	103	mem->cpu_va = NULL;
	104	}
	105
	106	static void pramin_access_batch_rd_n(struct gk20a g, u32 start, u32 words, u32 *arg)
	107	{
	108	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
	109	u32 r = start, dest_u32 = arg;
	110
	111	if (!l->regs) {
	112	__gk20a_warn_on_no_regs();
	113	return;
	114	}
	115
	116	while (words--) {
	117	*dest_u32++ = gk20a_readl(g, r);
	118	r += sizeof(u32);
	119	}
	120
	121	*arg = dest_u32;
	122	}
	123
	124	u32 nvgpu_mem_rd32(struct gk20a g, struct nvgpu_mem mem, u32 w)
	125	{
	126	u32 data = 0;
	127
	128	if (mem->aperture == APERTURE_SYSMEM && !g->mm.force_pramin) {
	129	u32 *ptr = mem->cpu_va;
	130
	131	WARN_ON(!ptr);
	132	data = ptr[w];
	133	#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
	134	gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr + w, data);
	135	#endif
	136	} else if (mem->aperture == APERTURE_VIDMEM \|\| g->mm.force_pramin) {
	137	u32 value;
	138	u32 *p = &value;
	139
	140	nvgpu_pramin_access_batched(g, mem, w * sizeof(u32),
	141	sizeof(u32), pramin_access_batch_rd_n, &p);
	142
	143	data = value;
	144
	145	} else {
	146	WARN_ON("Accessing unallocated nvgpu_mem");
	147	}
	148
	149	return data;
	150	}
	151
	152	u32 nvgpu_mem_rd(struct gk20a g, struct nvgpu_mem mem, u32 offset)
	153	{
	154	WARN_ON(offset & 3);
	155	return nvgpu_mem_rd32(g, mem, offset / sizeof(u32));
	156	}
	157
	158	void nvgpu_mem_rd_n(struct gk20a g, struct nvgpu_mem mem,
	159	u32 offset, void *dest, u32 size)
	160	{
	161	WARN_ON(offset & 3);
	162	WARN_ON(size & 3);
	163
	164	if (mem->aperture == APERTURE_SYSMEM && !g->mm.force_pramin) {
	165	u8 src = (u8 )mem->cpu_va + offset;
	166
	167	WARN_ON(!mem->cpu_va);
	168	memcpy(dest, src, size);
	169	#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
	170	if (size)
	171	gk20a_dbg(gpu_dbg_mem, " %p = 0x%x ... [%d bytes]",
	172	src, *dest, size);
	173	#endif
	174	} else if (mem->aperture == APERTURE_VIDMEM \|\| g->mm.force_pramin) {
	175	u32 *dest_u32 = dest;
	176
	177	nvgpu_pramin_access_batched(g, mem, offset, size,
	178	pramin_access_batch_rd_n, &dest_u32);
	179	} else {
	180	WARN_ON("Accessing unallocated nvgpu_mem");
	181	}
	182	}
	183
	184	static void pramin_access_batch_wr_n(struct gk20a g, u32 start, u32 words, u32 *arg)
	185	{
	186	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
	187	u32 r = start, src_u32 = arg;
	188
	189	if (!l->regs) {
	190	__gk20a_warn_on_no_regs();
	191	return;
	192	}
	193
	194	while (words--) {
	195	writel_relaxed(*src_u32++, l->regs + r);
	196	r += sizeof(u32);
	197	}
	198
	199	*arg = src_u32;
	200	}
	201
	202	void nvgpu_mem_wr32(struct gk20a g, struct nvgpu_mem mem, u32 w, u32 data)
	203	{
	204	if (mem->aperture == APERTURE_SYSMEM && !g->mm.force_pramin) {
	205	u32 *ptr = mem->cpu_va;
	206
	207	WARN_ON(!ptr);
	208	#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
	209	gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr + w, data);
	210	#endif
	211	ptr[w] = data;
	212	} else if (mem->aperture == APERTURE_VIDMEM \|\| g->mm.force_pramin) {
	213	u32 value = data;
	214	u32 *p = &value;
	215
	216	nvgpu_pramin_access_batched(g, mem, w * sizeof(u32),
	217	sizeof(u32), pramin_access_batch_wr_n, &p);
	218	if (!mem->skip_wmb)
	219	wmb();
	220	} else {
	221	WARN_ON("Accessing unallocated nvgpu_mem");
	222	}
	223	}
	224
	225	void nvgpu_mem_wr(struct gk20a g, struct nvgpu_mem mem, u32 offset, u32 data)
	226	{
	227	WARN_ON(offset & 3);
	228	nvgpu_mem_wr32(g, mem, offset / sizeof(u32), data);
	229	}
	230
	231	void nvgpu_mem_wr_n(struct gk20a g, struct nvgpu_mem mem, u32 offset,
	232	void *src, u32 size)
	233	{
	234	WARN_ON(offset & 3);
	235	WARN_ON(size & 3);
	236
	237	if (mem->aperture == APERTURE_SYSMEM && !g->mm.force_pramin) {
	238	u8 dest = (u8 )mem->cpu_va + offset;
	239
	240	WARN_ON(!mem->cpu_va);
	241	#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
	242	if (size)
	243	gk20a_dbg(gpu_dbg_mem, " %p = 0x%x ... [%d bytes]",
	244	dest, *src, size);
	245	#endif
	246	memcpy(dest, src, size);
	247	} else if (mem->aperture == APERTURE_VIDMEM \|\| g->mm.force_pramin) {
	248	u32 *src_u32 = src;
	249
	250	nvgpu_pramin_access_batched(g, mem, offset, size,
	251	pramin_access_batch_wr_n, &src_u32);
	252	if (!mem->skip_wmb)
	253	wmb();
	254	} else {
	255	WARN_ON("Accessing unallocated nvgpu_mem");
	256	}
	257	}
	258
	259	static void pramin_access_batch_set(struct gk20a g, u32 start, u32 words, u32 *arg)
	260	{
	261	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
	262	u32 r = start, repeat = **arg;
	263
	264	if (!l->regs) {
	265	__gk20a_warn_on_no_regs();
	266	return;
	267	}
	268
	269	while (words--) {
	270	writel_relaxed(repeat, l->regs + r);
	271	r += sizeof(u32);
	272	}
	273	}
	274
	275	void nvgpu_memset(struct gk20a g, struct nvgpu_mem mem, u32 offset,
	276	u32 c, u32 size)
	277	{
	278	WARN_ON(offset & 3);
	279	WARN_ON(size & 3);
	280	WARN_ON(c & ~0xff);
	281
	282	c &= 0xff;
	283
	284	if (mem->aperture == APERTURE_SYSMEM && !g->mm.force_pramin) {
	285	u8 dest = (u8 )mem->cpu_va + offset;
	286
	287	WARN_ON(!mem->cpu_va);
	288	#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
	289	if (size)
	290	gk20a_dbg(gpu_dbg_mem, " %p = 0x%x [times %d]",
	291	dest, c, size);
	292	#endif
	293	memset(dest, c, size);
	294	} else if (mem->aperture == APERTURE_VIDMEM \|\| g->mm.force_pramin) {
	295	u32 repeat_value = c \| (c << 8) \| (c << 16) \| (c << 24);
	296	u32 *p = &repeat_value;
	297
	298	nvgpu_pramin_access_batched(g, mem, offset, size,
	299	pramin_access_batch_set, &p);
	300	if (!mem->skip_wmb)
	301	wmb();
	302	} else {
	303	WARN_ON("Accessing unallocated nvgpu_mem");
	304	}
	305	}
	306
	307	/*
	308	* Obtain a SYSMEM address from a Linux SGL. This should eventually go away
	309	* and/or become private to this file once all bad usages of Linux SGLs are
	310	* cleaned up in the driver.
	311	*/
	312	u64 nvgpu_mem_get_addr_sgl(struct gk20a g, struct scatterlist sgl)
	313	{
	314	struct nvgpu_os_linux *l = container_of(g, struct nvgpu_os_linux, g);
	315
	316	if (!device_is_iommuable(l->dev))
	317	return g->ops.mm.gpu_phys_addr(g, NULL, sg_phys(sgl));
	318
	319	if (sg_dma_address(sgl) == 0)
	320	return g->ops.mm.gpu_phys_addr(g, NULL, sg_phys(sgl));
	321
	322	if (sg_dma_address(sgl) == DMA_ERROR_CODE)
	323	return 0;
	324
	325	return nvgpu_mem_iommu_translate(g, sg_dma_address(sgl));
	326	}
	327
	328	/*
	329	* Obtain the address the GPU should use from the %mem assuming this is a SYSMEM
	330	* allocation.
	331	*/
	332	static u64 nvgpu_mem_get_addr_sysmem(struct gk20a g, struct nvgpu_mem mem)
	333	{
	334	return nvgpu_mem_get_addr_sgl(g, mem->priv.sgt->sgl);
	335	}
	336
	337	/*
	338	* Return the base address of %mem. Handles whether this is a VIDMEM or SYSMEM
	339	* allocation.
	340	*
	341	* Note: this API does not make sense to use for _VIDMEM_ buffers with greater
	342	* than one scatterlist chunk. If there's more than one scatterlist chunk then
	343	* the buffer will not be contiguous. As such the base address probably isn't
	344	* very useful. This is true for SYSMEM as well, if there's no IOMMU.
	345	*
	346	* However! It _is_ OK to use this on discontiguous sysmem buffers _if_ there's
	347	* an IOMMU present and enabled for the GPU.
	348	*
	349	* %attrs can be NULL. If it is not NULL then it may be inspected to determine
	350	* if the address needs to be modified before writing into a PTE.
	351	*/
	352	u64 nvgpu_mem_get_addr(struct gk20a g, struct nvgpu_mem mem)
	353	{
	354	struct nvgpu_page_alloc *alloc;
	355
	356	if (mem->aperture == APERTURE_SYSMEM)
	357	return nvgpu_mem_get_addr_sysmem(g, mem);
	358
	359	/*
	360	* Otherwise get the vidmem address.
	361	*/
	362	alloc = mem->vidmem_alloc;
	363
	364	/* This API should not be used with > 1 chunks */
	365	WARN_ON(alloc->nr_chunks != 1);
	366
	367	return alloc->base;
	368	}
	369
	370	/*
	371	* This should only be used on contiguous buffers regardless of whether
	372	* there's an IOMMU present/enabled. This applies to both SYSMEM and
	373	* VIDMEM.
	374	*/
	375	u64 nvgpu_mem_get_phys_addr(struct gk20a g, struct nvgpu_mem mem)
	376	{
	377	/*
	378	* For a VIDMEM buf, this is identical to simply get_addr() so just fall
	379	* back to that.
	380	*/
	381	if (mem->aperture == APERTURE_VIDMEM)
	382	return nvgpu_mem_get_addr(g, mem);
	383
	384	return sg_phys(mem->priv.sgt->sgl);
	385	}
	386
	387	/*
	388	* Be careful how you use this! You are responsible for correctly freeing this
	389	* memory.
	390	*/
	391	int nvgpu_mem_create_from_mem(struct gk20a *g,
	392	struct nvgpu_mem dest, struct nvgpu_mem src,
	393	int start_page, int nr_pages)
	394	{
	395	int ret;
	396	u64 start = start_page * PAGE_SIZE;
	397	u64 size = nr_pages * PAGE_SIZE;
	398	dma_addr_t new_iova;
	399
	400	if (src->aperture != APERTURE_SYSMEM)
	401	return -EINVAL;
	402
	403	/* Some silly things a caller might do... */
	404	if (size > src->size)
	405	return -EINVAL;
	406	if ((start + size) > src->size)
	407	return -EINVAL;
	408
	409	dest->mem_flags = src->mem_flags \| NVGPU_MEM_FLAG_SHADOW_COPY;
	410	dest->aperture = src->aperture;
	411	dest->skip_wmb = src->skip_wmb;
	412	dest->size = size;
	413
	414	/*
	415	* Re-use the CPU mapping only if the mapping was made by the DMA API.
	416	*/
	417	if (!(src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING))
	418	dest->cpu_va = src->cpu_va + (PAGE_SIZE * start_page);
	419
	420	dest->priv.pages = src->priv.pages + start_page;
	421	dest->priv.flags = src->priv.flags;
	422
	423	new_iova = sg_dma_address(src->priv.sgt->sgl) ?
	424	sg_dma_address(src->priv.sgt->sgl) + start : 0;
	425
	426	/*
	427	* Make a new SG table that is based only on the subset of pages that
	428	* is passed to us. This table gets freed by the dma free routines.
	429	*/
	430	if (src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING)
	431	ret = nvgpu_get_sgtable_from_pages(g, &dest->priv.sgt,
	432	src->priv.pages + start_page,
	433	new_iova, size);
	434	else
	435	ret = nvgpu_get_sgtable(g, &dest->priv.sgt, dest->cpu_va,
	436	new_iova, size);
	437
	438	return ret;
	439	}
	440
	441	int __nvgpu_mem_create_from_pages(struct gk20a g, struct nvgpu_mem dest,
	442	struct page **pages, int nr_pages)
	443	{
	444	struct sg_table *sgt;
	445	struct page **our_pages =
	446	nvgpu_kmalloc(g, sizeof(struct page ) nr_pages);
	447
	448	if (!our_pages)
	449	return -ENOMEM;
	450
	451	memcpy(our_pages, pages, sizeof(struct page ) nr_pages);
	452
	453	if (nvgpu_get_sgtable_from_pages(g, &sgt, pages, 0,
	454	nr_pages * PAGE_SIZE)) {
	455	nvgpu_kfree(g, our_pages);
	456	return -ENOMEM;
	457	}
	458
	459	/*
	460	* If we are making an SGT from physical pages we can be reasonably
	461	* certain that this should bypass the SMMU - thus we set the DMA (aka
	462	* IOVA) address to 0. This tells the GMMU mapping code to not make a
	463	* mapping directed to the SMMU.
	464	*/
	465	sg_dma_address(sgt->sgl) = 0;
	466
	467	dest->mem_flags = __NVGPU_MEM_FLAG_NO_DMA;
	468	dest->aperture = APERTURE_SYSMEM;
	469	dest->skip_wmb = 0;
	470	dest->size = PAGE_SIZE * nr_pages;
	471
	472	dest->priv.flags = 0;
	473	dest->priv.pages = our_pages;
	474	dest->priv.sgt = sgt;
	475
	476	return 0;
	477	}
	478
	479	int __nvgpu_mem_create_from_phys(struct gk20a g, struct nvgpu_mem dest,
	480	u64 src_phys, int nr_pages)
	481	{
	482	struct page **pages =
	483	nvgpu_kmalloc(g, sizeof(struct page ) nr_pages);
	484	int i, ret = 0;
	485
	486	if (!pages)
	487	return -ENOMEM;
	488
	489	for (i = 0; i < nr_pages; i++)
	490	pages[i] = phys_to_page(src_phys + PAGE_SIZE * i);
	491
	492	ret = __nvgpu_mem_create_from_pages(g, dest, pages, nr_pages);
	493	nvgpu_kfree(g, pages);
	494
	495	return ret;
	496	}
	497
	498	static void nvgpu_mem_linux_sgl_next(void sgl)
	499	{
	500	return sg_next((struct scatterlist *)sgl);
	501	}
	502
	503	static u64 nvgpu_mem_linux_sgl_phys(void *sgl)
	504	{
	505	return (u64)sg_phys((struct scatterlist *)sgl);
	506	}
	507
	508	static u64 nvgpu_mem_linux_sgl_dma(void *sgl)
	509	{
	510	return (u64)sg_dma_address((struct scatterlist *)sgl);
	511	}
	512
	513	static u64 nvgpu_mem_linux_sgl_length(void *sgl)
	514	{
	515	return (u64)((struct scatterlist *)sgl)->length;
	516	}
	517
	518	static u64 nvgpu_mem_linux_sgl_gpu_addr(struct gk20a g, void sgl,
	519	struct nvgpu_gmmu_attrs *attrs)
	520	{
	521	if (sg_dma_address((struct scatterlist *)sgl) == 0)
	522	return g->ops.mm.gpu_phys_addr(g, attrs,
	523	sg_phys((struct scatterlist *)sgl));
	524
	525	if (sg_dma_address((struct scatterlist *)sgl) == DMA_ERROR_CODE)
	526	return 0;
	527
	528	return nvgpu_mem_iommu_translate(g,
	529	sg_dma_address((struct scatterlist *)sgl));
	530	}
	531
	532	static bool nvgpu_mem_linux_sgt_iommuable(struct gk20a *g,
	533	struct nvgpu_sgt *sgt)
	534	{
	535	return true;
	536	}
	537
	538	static void nvgpu_mem_linux_sgl_free(struct gk20a g, struct nvgpu_sgt sgt)
	539	{
	540	/*
	541	* Free this SGT. All we do is free the passed SGT. The actual Linux
	542	* SGT/SGL needs to be freed separately.
	543	*/
	544	nvgpu_kfree(g, sgt);
	545	}
	546
	547	static const struct nvgpu_sgt_ops nvgpu_linux_sgt_ops = {
	548	.sgl_next = nvgpu_mem_linux_sgl_next,
	549	.sgl_phys = nvgpu_mem_linux_sgl_phys,
	550	.sgl_dma = nvgpu_mem_linux_sgl_dma,
	551	.sgl_length = nvgpu_mem_linux_sgl_length,
	552	.sgl_gpu_addr = nvgpu_mem_linux_sgl_gpu_addr,
	553	.sgt_iommuable = nvgpu_mem_linux_sgt_iommuable,
	554	.sgt_free = nvgpu_mem_linux_sgl_free,
	555	};
	556
	557	static struct nvgpu_sgt *__nvgpu_mem_get_sgl_from_vidmem(
	558	struct gk20a *g,
	559	struct scatterlist *linux_sgl)
	560	{
	561	struct nvgpu_page_alloc *vidmem_alloc;
	562
	563	vidmem_alloc = nvgpu_vidmem_get_page_alloc(linux_sgl);
	564	if (!vidmem_alloc)
	565	return NULL;
	566
	567	return &vidmem_alloc->sgt;
	568	}
	569
	570	struct nvgpu_sgt nvgpu_linux_sgt_create(struct gk20a g, struct sg_table *sgt)
	571	{
	572	struct nvgpu_sgt *nvgpu_sgt;
	573	struct scatterlist *linux_sgl = sgt->sgl;
	574
	575	if (nvgpu_addr_is_vidmem_page_alloc(sg_dma_address(linux_sgl)))
	576	return __nvgpu_mem_get_sgl_from_vidmem(g, linux_sgl);
	577
	578	nvgpu_sgt = nvgpu_kzalloc(g, sizeof(*nvgpu_sgt));
	579	if (!nvgpu_sgt)
	580	return NULL;
	581
	582	nvgpu_log(g, gpu_dbg_sgl, "Making Linux SGL!");
	583
	584	nvgpu_sgt->sgl = sgt->sgl;
	585	nvgpu_sgt->ops = &nvgpu_linux_sgt_ops;
	586
	587	return nvgpu_sgt;
	588	}
	589
	590	struct nvgpu_sgt nvgpu_sgt_create_from_mem(struct gk20a g,
	591	struct nvgpu_mem *mem)
	592	{
	593	return nvgpu_linux_sgt_create(g, mem->priv.sgt);
	594	}