From c1393d5b68e63c992f4c689cb788139fdf8c2f1a Mon Sep 17 00:00:00 2001
From: Alex Waterman <alexw@nvidia.com>
Date: Thu, 11 May 2017 21:59:22 +0100
Subject: gpu: nvgpu: gmmu programming rewrite

Update the high level mapping logic. Instead of iterating over the
GPU VA iterate over the scatter-gather table chunks. As a result
each GMMU page table update call is simplified dramatically.

This also modifies the chip level code to no longer require an SGL
as an argument. Each call to the chip level code will be guaranteed
to be contiguous so it only has to worry about making a mapping from
virt -> phys.

This removes the dependency on Linux that the chip code currently
has. With this patch the core GMMU code still uses the Linux SGL but
the logic is highly transferable to a different, nvgpu specific,
scatter gather list format in the near future.

The last major update is to push most of the page table attribute
arguments to a struct. That struct is passed on through the various
mapping levels. This makes the funtions calls more simple and
easier to follow.

JIRA NVGPU-30

Change-Id: Ibb6b11755f99818fe642622ca0bd4cbed054f602
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: https://git-master/r/1484104
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/include/nvgpu/gmmu.h      | 136 +++++++++++++++++++++++-----
 drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h |   6 +-
 drivers/gpu/nvgpu/include/nvgpu/vm.h        |   3 +-
 3 files changed, 117 insertions(+), 28 deletions(-)

(limited to 'drivers/gpu/nvgpu/include')

diff --git a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
index ed152cd8..28a2cb82 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
@@ -38,36 +38,97 @@ enum gmmu_pgsz_gk20a {
 	gmmu_nr_page_sizes    = 3,
 };
 
-struct gk20a_mm_entry {
-	/* backing for */
-	struct nvgpu_mem mem;
-	u32 woffset; /* if >0, mem is a shadow copy, owned by another entry */
-	int pgsz;
-	struct gk20a_mm_entry *entries;
-	int num_entries;
+enum gk20a_mem_rw_flag {
+	gk20a_mem_flag_none = 0,	/* RW */
+	gk20a_mem_flag_read_only = 1,	/* RO */
+	gk20a_mem_flag_write_only = 2,	/* WO */
+};
+
+/*
+ * GMMU page directory. This is the kernel's tracking of a list of PDEs or PTEs
+ * in the GMMU.
+ */
+struct nvgpu_gmmu_pd {
+	/*
+	 * DMA memory describing the PTEs or PTEs.
+	 */
+	struct nvgpu_mem	 mem;
+
+	/*
+	 * List of pointers to the next level of page tables. Does not
+	 * need to be populated when this PD is pointing to PTEs.
+	 */
+	struct nvgpu_gmmu_pd	*entries;
+	int			 num_entries;
+};
+
+/*
+ * Reduce the number of arguments getting passed through the various levels of
+ * GMMU mapping functions.
+ *
+ * The following fields are set statically and do not change throughout
+ * mapping call:
+ *
+ *   pgsz:        Index into the page size table.
+ *   kind_v:      Kind attributes for mapping.
+ *   cacheable:   Cacheability of the mapping.
+ *   rw_flag:     Flag from enum gk20a_mem_rw_flag
+ *   sparse:      Set if the mapping should be sparse.
+ *   priv:        Privilidged mapping.
+ *   valid:       Set if the PTE should be marked valid.
+ *   aperture:    VIDMEM or SYSMEM.
+ *   debug:       When set print debugging info.
+ *
+ * These fields are dynamically updated as necessary during the map:
+ *
+ *   ctag:        Comptag line in the comptag cache;
+ *                updated every time we write a PTE.
+ */
+struct nvgpu_gmmu_attrs {
+	u32			 pgsz;
+	u32			 kind_v;
+	u64			 ctag;
+	bool			 cacheable;
+	int			 rw_flag;
+	bool			 sparse;
+	bool			 priv;
+	bool			 valid;
+	enum nvgpu_aperture	 aperture;
+	bool			 debug;
 };
 
 struct gk20a_mmu_level {
 	int hi_bit[2];
 	int lo_bit[2];
-	int (*update_entry)(struct vm_gk20a *vm,
-			   struct gk20a_mm_entry *pte,
-			   u32 i, u32 gmmu_pgsz_idx,
-			   struct scatterlist **sgl,
-			   u64 *offset,
-			   u64 *iova,
-			   u32 kind_v, u64 *ctag,
-			   bool cacheable, bool unmapped_pte,
-			   int rw_flag, bool sparse, bool priv,
-			   enum nvgpu_aperture aperture);
-	size_t entry_size;
+
+	/*
+	 * Build map from virt_addr -> phys_addr.
+	 */
+	void (*update_entry)(struct vm_gk20a *vm,
+			     const struct gk20a_mmu_level *l,
+			     struct nvgpu_gmmu_pd *pd,
+			     u32 pd_idx,
+			     u64 phys_addr,
+			     u64 virt_addr,
+			     struct nvgpu_gmmu_attrs *attrs);
+	u32 entry_size;
 };
 
-int nvgpu_zalloc_gmmu_page_table(struct vm_gk20a *vm,
-				 enum gmmu_pgsz_gk20a pgsz_idx,
-				 const struct gk20a_mmu_level *l,
-				 struct gk20a_mm_entry *entry,
-				 struct gk20a_mm_entry *prev_entry);
+static inline const char *nvgpu_gmmu_perm_str(enum gk20a_mem_rw_flag p)
+{
+	switch (p) {
+	case gk20a_mem_flag_none:
+		return "RW";
+	case gk20a_mem_flag_write_only:
+		return "WO";
+	case gk20a_mem_flag_read_only:
+		return "RO";
+	default:
+		return "??";
+	}
+}
+
+int nvgpu_gmmu_init_page_table(struct vm_gk20a *vm);
 
 /**
  * nvgpu_gmmu_map - Map memory into the GMMU.
@@ -106,6 +167,33 @@ void nvgpu_gmmu_unmap(struct vm_gk20a *vm,
 		      u64 gpu_va);
 
 void nvgpu_free_gmmu_pages(struct vm_gk20a *vm,
-		     struct gk20a_mm_entry *entry);
+		     struct nvgpu_gmmu_pd *entry);
+
+/*
+ * Some useful routines that are shared across chips.
+ */
+static inline u32 pd_offset_from_index(const struct gk20a_mmu_level *l,
+				       u32 pd_idx)
+{
+	return (pd_idx * l->entry_size) / sizeof(u32);
+}
+
+static inline void pd_write(struct gk20a *g, struct nvgpu_gmmu_pd *pd,
+			    size_t w, size_t data)
+{
+	nvgpu_mem_wr32(g, &pd->mem, w, data);
+}
+
+
+/*
+ * Internal debugging routines. Probably not something you want to use.
+ */
+#define pte_dbg(g, attrs, fmt, args...)					\
+	do {								\
+		if (attrs && attrs->debug)				\
+			nvgpu_info(g, fmt, ##args);			\
+		else							\
+			nvgpu_log(g, gpu_dbg_pte, fmt, ##args);		\
+	} while (0)
 
 #endif
diff --git a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h
index 66d04ab8..4259d40f 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h
@@ -109,9 +109,9 @@ nvgpu_mem_from_clear_list_entry(struct nvgpu_list_node *node)
 static inline const char *nvgpu_aperture_str(enum nvgpu_aperture aperture)
 {
 	switch (aperture) {
-		case APERTURE_INVALID: return "invalid";
-		case APERTURE_SYSMEM:  return "sysmem";
-		case APERTURE_VIDMEM:  return "vidmem";
+		case APERTURE_INVALID: return "INVAL";
+		case APERTURE_SYSMEM:  return "SYSMEM";
+		case APERTURE_VIDMEM:  return "VIDMEM";
 	};
 	return "UNKNOWN";
 }
diff --git a/drivers/gpu/nvgpu/include/nvgpu/vm.h b/drivers/gpu/nvgpu/include/nvgpu/vm.h
index f6d88cc3..255b4361 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/vm.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/vm.h
@@ -126,6 +126,7 @@ mapped_buffer_from_rbtree_node(struct nvgpu_rbtree_node *node)
 struct vm_gk20a {
 	struct mm_gk20a *mm;
 	struct gk20a_as_share *as_share; /* as_share this represents */
+	char name[20];
 
 	u64 va_start;
 	u64 va_limit;
@@ -145,7 +146,7 @@ struct vm_gk20a {
 
 	struct nvgpu_mutex update_gmmu_lock;
 
-	struct gk20a_mm_entry pdb;
+	struct nvgpu_gmmu_pd pdb;
 
 	/*
 	 * These structs define the address spaces. In some cases it's possible
-- 
cgit v1.2.2