gpu: nvgpu: add generic api for sparse memory

Bug 1442531 Change-Id: I97408b54e27f5ed6411792e73f079a6f86cbe5f6 Signed-off-by: Kevin Huang <kevinh@nvidia.com>
author: Kevin Huang <kevinh@nvidia.com> 2014-04-14 19:53:09 -0400
committer: Dan Willemsen <dwillemsen@nvidia.com> 2015-03-18 15:09:59 -0400
commit: da56b17de0b360cb020a8831ba82c202057c2968 (patch)
tree: 7e759f5ca412775001b00cc4211515c37b3c35ed /drivers/gpu
parent: d364553f7c94b4caa649f5e83f709c1c31bc45bb (diff)
4 files changed, 54 insertions, 23 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 6d3c8225..081ec077 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -211,6 +211,10 @@ struct gpu_ops {
                int (*get_netlist_name)(int index, char *name);
                bool (*is_fw_defined)(void);
        } gr_ctx;
+        struct {
+                int (*set_sparse)(struct vm_gk20a *vm, u64 vaddr,
+                               u32 num_pages, u32 pgsz_idx);
+        } mm;
 };
 struct gk20a {
diff --git a/drivers/gpu/nvgpu/gk20a/hal_gk20a.c b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c
index f8a997e2..de6c3713 100644
--- a/drivers/gpu/nvgpu/gk20a/hal_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c
@@ -22,6 +22,7 @@
 #include "gk20a_gating_reglist.h"
 #include "channel_gk20a.h"
 #include "gr_ctx_gk20a.h"
+#include "mm_gk20a.h"
 struct gpu_ops gk20a_ops = {
        .clock_gating = {
@@ -46,6 +47,7 @@ int gk20a_init_hal(struct gpu_ops *gops)
        gk20a_init_fb(gops);
        gk20a_init_fifo(gops);
        gk20a_init_gr_ctx(gops);
+        gk20a_init_mm(gops);
        gops->name = "gk20a";
        return 0;
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index ebd829d3..dd492d92 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -29,7 +29,6 @@
 #include <linux/tegra-soc.h>
 #include <linux/vmalloc.h>
 #include <linux/dma-buf.h>
-#include <asm/cacheflush.h>
 #include "gk20a.h"
 #include "mm_gk20a.h"
@@ -43,11 +42,6 @@
 #include "kind_gk20a.h"
-#ifdef CONFIG_ARM64
-#define outer_flush_range(a, b)
-#define __cpuc_flush_dcache_area __flush_dcache_area
-#endif
 /*
 * GPU mapping life cycle
 * ======================
@@ -93,12 +87,6 @@ static inline u32 lo32(u64 f)
        return (u32)(f & 0xffffffff);
 }
-#define FLUSH_CPU_DCACHE(va, pa, size)  \
-        do {    \
-                __cpuc_flush_dcache_area((void *)(va), (size_t)(size)); \
-                outer_flush_range(pa, pa + (size_t)(size));             \
-        } while (0)
 static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer);
 static struct mapped_buffer_node *find_mapped_buffer_locked(
                                        struct rb_root *root, u64 addr);
@@ -463,7 +451,7 @@ static void free_gmmu_pages(struct vm_gk20a *vm, void *handle,
        kfree(sgt);
 }
-static int map_gmmu_pages(void *handle, struct sg_table *sgt,
+int map_gmmu_pages(void *handle, struct sg_table *sgt,
                          void **va, size_t size)
 {
        FLUSH_CPU_DCACHE(handle, sg_phys(sgt->sgl), sgt->sgl->length);
@@ -471,7 +459,7 @@ static int map_gmmu_pages(void *handle, struct sg_table *sgt,
        return 0;
 }
-static void unmap_gmmu_pages(void *handle, struct sg_table *sgt, void *va)
+void unmap_gmmu_pages(void *handle, struct sg_table *sgt, void *va)
 {
        FLUSH_CPU_DCACHE(handle, sg_phys(sgt->sgl), sgt->sgl->length);
 }
@@ -571,7 +559,7 @@ static void free_gmmu_pages(struct vm_gk20a *vm, void *handle,
        iova = 0;
 }
-static int map_gmmu_pages(void *handle, struct sg_table *sgt,
+int map_gmmu_pages(void *handle, struct sg_table *sgt,
                          void **kva, size_t size)
 {
        int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
@@ -590,7 +578,7 @@ static int map_gmmu_pages(void *handle, struct sg_table *sgt,
        return 0;
 }
-static void unmap_gmmu_pages(void *handle, struct sg_table *sgt, void *va)
+void unmap_gmmu_pages(void *handle, struct sg_table *sgt, void *va)
 {
        gk20a_dbg_fn("");
@@ -605,7 +593,7 @@ static void unmap_gmmu_pages(void *handle, struct sg_table *sgt, void *va)
 * the whole range is zeroed so it's "invalid"/will fault
 */
-static int zalloc_gmmu_page_table_gk20a(struct vm_gk20a *vm,
+int zalloc_gmmu_page_table_gk20a(struct vm_gk20a *vm,
                                        enum gmmu_pgsz_gk20a gmmu_pgsz_idx,
                                        struct page_table_gk20a *pte)
 {
@@ -635,7 +623,7 @@ static int zalloc_gmmu_page_table_gk20a(struct vm_gk20a *vm,
 }
 /* given address range (inclusive) determine the pdes crossed */
-static inline void pde_range_from_vaddr_range(struct vm_gk20a *vm,
+void pde_range_from_vaddr_range(struct vm_gk20a *vm,
                                              u64 addr_lo, u64 addr_hi,
                                              u32 *pde_lo, u32 *pde_hi)
 {
@@ -647,12 +635,12 @@ static inline void pde_range_from_vaddr_range(struct vm_gk20a *vm,
                   *pde_lo, *pde_hi);
 }
-static inline u32 *pde_from_index(struct vm_gk20a *vm, u32 i)
+u32 *pde_from_index(struct vm_gk20a *vm, u32 i)
 {
        return (u32 *) (((u8 *)vm->pdes.kv) + i*gmmu_pde__size_v());
 }
-static inline u32 pte_index_from_vaddr(struct vm_gk20a *vm,
+u32 pte_index_from_vaddr(struct vm_gk20a *vm,
                                       u64 addr, enum gmmu_pgsz_gk20a pgsz_idx)
 {
        u32 ret;
@@ -686,7 +674,7 @@ static inline void pte_space_page_offset_from_index(u32 i, u32 *pte_page,
 * backing store and if not go ahead allocate it and
 * record it in the appropriate pde
 */
-static int validate_gmmu_page_table_gk20a_locked(struct vm_gk20a *vm,
+int validate_gmmu_page_table_gk20a_locked(struct vm_gk20a *vm,
                                u32 i, enum gmmu_pgsz_gk20a gmmu_pgsz_idx)
 {
        int err;
@@ -1981,6 +1969,7 @@ err_unmap:
 static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer)
 {
        struct vm_gk20a *vm = mapped_buffer->vm;
+        struct gk20a *g = vm->mm->g;
        if (mapped_buffer->va_node &&
            mapped_buffer->va_node->sparse) {
@@ -1990,7 +1979,7 @@ static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer)
                        gmmu_page_shifts[pgsz_idx];
                /* there is little we can do if this fails... */
-                gk20a_vm_put_empty(vm, vaddr, num_pages, pgsz_idx);
+                g->ops.mm.set_sparse(vm, vaddr, num_pages, pgsz_idx);
        } else
                __locked_gmmu_unmap(vm,
@@ -2259,6 +2248,7 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
        u32 start_page_nr;
        struct gk20a_allocator *vma;
        struct vm_gk20a *vm = as_share->vm;
+        struct gk20a *g = vm->mm->g;
        struct vm_reserved_va_node *va_node;
        u64 vaddr_start = 0;
@@ -2316,7 +2306,7 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
        /* mark that we need to use sparse mappings here */
        if (args->flags & NVHOST_AS_ALLOC_SPACE_FLAGS_SPARSE) {
-                err = gk20a_vm_put_empty(vm, vaddr_start, args->pages,
+                err = g->ops.mm.set_sparse(vm, vaddr_start, args->pages,
                                         pgsz_idx);
                if (err) {
                        mutex_unlock(&vm->update_gmmu_lock);
@@ -3076,3 +3066,9 @@ int gk20a_mm_mmu_vpr_info_fetch(struct gk20a *g)
        gk20a_idle(g->dev);
        return ret;
 }
+void gk20a_init_mm(struct gpu_ops *gops)
+{
+        gops->mm.set_sparse = gk20a_vm_put_empty;
+}
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 7f86710b..b0f3536e 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -25,6 +25,7 @@
 #include <linux/dma-attrs.h>
 #include <linux/iommu.h>
 #include <asm/dma-iommu.h>
+#include <asm/cacheflush.h>
 #include "gk20a_allocator.h"
 /* This "address bit" in the gmmu ptes (and other gk20a accesses)
@@ -41,6 +42,17 @@
 #define NV_GMMU_VA_RANGE        35ULL
 #define NV_GMMU_VA_IS_UPPER(x)  ((x) >= ((u64)0x1 << (NV_GMMU_VA_RANGE-1)))
+#ifdef CONFIG_ARM64
+#define outer_flush_range(a, b)
+#define __cpuc_flush_dcache_area __flush_dcache_area
+#endif
+#define FLUSH_CPU_DCACHE(va, pa, size)  \
+        do {    \
+                __cpuc_flush_dcache_area((void *)(va), (size_t)(size)); \
+                outer_flush_range(pa, pa + (size_t)(size));             \
+        } while (0)
 struct mem_desc {
        struct dma_buf *ref;
        struct sg_table *sgt;
@@ -463,4 +475,21 @@ int gk20a_vm_map_buffer(struct gk20a_as_share *as_share,
 int gk20a_vm_unmap_buffer(struct gk20a_as_share *, u64 offset);
 int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev);
+int map_gmmu_pages(void *handle, struct sg_table *sgt,
+                          void **va, size_t size);
+void unmap_gmmu_pages(void *handle, struct sg_table *sgt, void *va);
+void pde_range_from_vaddr_range(struct vm_gk20a *vm,
+                                              u64 addr_lo, u64 addr_hi,
+                                              u32 *pde_lo, u32 *pde_hi);
+u32 *pde_from_index(struct vm_gk20a *vm, u32 i);
+u32 pte_index_from_vaddr(struct vm_gk20a *vm,
+                               u64 addr, enum gmmu_pgsz_gk20a pgsz_idx);
+int validate_gmmu_page_table_gk20a_locked(struct vm_gk20a *vm,
+                                u32 i, enum gmmu_pgsz_gk20a gmmu_pgsz_idx);
+int zalloc_gmmu_page_table_gk20a(struct vm_gk20a *vm,
+                                        enum gmmu_pgsz_gk20a gmmu_pgsz_idx,
+                                        struct page_table_gk20a *pte);
+struct gpu_ops;
+void gk20a_init_mm(struct gpu_ops *gops);
 #endif /*_MM_GK20A_H_ */
author	Kevin Huang <kevinh@nvidia.com>	2014-04-14 19:53:09 -0400
committer	Dan Willemsen <dwillemsen@nvidia.com>	2015-03-18 15:09:59 -0400
commit	da56b17de0b360cb020a8831ba82c202057c2968 (patch)
tree	7e759f5ca412775001b00cc4211515c37b3c35ed /drivers/gpu
parent	d364553f7c94b4caa649f5e83f709c1c31bc45bb (diff)