gpu: nvgpu: gmmu programming rewrite

Update the high level mapping logic. Instead of iterating over the GPU VA iterate over the scatter-gather table chunks. As a result each GMMU page table update call is simplified dramatically. This also modifies the chip level code to no longer require an SGL as an argument. Each call to the chip level code will be guaranteed to be contiguous so it only has to worry about making a mapping from virt -> phys. This removes the dependency on Linux that the chip code currently has. With this patch the core GMMU code still uses the Linux SGL but the logic is highly transferable to a different, nvgpu specific, scatter gather list format in the near future. The last major update is to push most of the page table attribute arguments to a struct. That struct is passed on through the various mapping levels. This makes the funtions calls more simple and easier to follow. JIRA NVGPU-30 Change-Id: Ibb6b11755f99818fe642622ca0bd4cbed054f602 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: https://git-master/r/1484104 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> GVS: Gerrit_Virtual_Submit
author: Alex Waterman <alexw@nvidia.com> 2017-05-11 16:59:22 -0400
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2017-07-06 17:44:15 -0400
commit: c1393d5b68e63c992f4c689cb788139fdf8c2f1a (patch)
tree: 00a588d35342d75c05fed7733e91da753ba640fb /drivers/gpu/nvgpu/include
parent: 84f712dee8b582dd7d2a19345c621a2ae3bd6292 (diff)
3 files changed, 117 insertions, 28 deletions
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
index ed152cd8..28a2cb82 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
@@ -38,36 +38,97 @@ enum gmmu_pgsz_gk20a {
        gmmu_nr_page_sizes    = 3,
 };
-struct gk20a_mm_entry {
+enum gk20a_mem_rw_flag {
-        /* backing for */
+        gk20a_mem_flag_none = 0,        /* RW */
-        struct nvgpu_mem mem;
+        gk20a_mem_flag_read_only = 1,   /* RO */
-        u32 woffset; /* if >0, mem is a shadow copy, owned by another entry */
+        gk20a_mem_flag_write_only = 2,  /* WO */
-        int pgsz;
+};
-        struct gk20a_mm_entry *entries;
-        int num_entries;
+/*
+ * GMMU page directory. This is the kernel's tracking of a list of PDEs or PTEs
+ * in the GMMU.
+ */
+struct nvgpu_gmmu_pd {
+        /*
+         * DMA memory describing the PTEs or PTEs.
+         */
+        struct nvgpu_mem         mem;
+        /*
+         * List of pointers to the next level of page tables. Does not
+         * need to be populated when this PD is pointing to PTEs.
+         */
+        struct nvgpu_gmmu_pd    *entries;
+        int                      num_entries;
+};
+/*
+ * Reduce the number of arguments getting passed through the various levels of
+ * GMMU mapping functions.
+ *
+ * The following fields are set statically and do not change throughout
+ * mapping call:
+ *
+ *   pgsz:        Index into the page size table.
+ *   kind_v:      Kind attributes for mapping.
+ *   cacheable:   Cacheability of the mapping.
+ *   rw_flag:     Flag from enum gk20a_mem_rw_flag
+ *   sparse:      Set if the mapping should be sparse.
+ *   priv:        Privilidged mapping.
+ *   valid:       Set if the PTE should be marked valid.
+ *   aperture:    VIDMEM or SYSMEM.
+ *   debug:       When set print debugging info.
+ *
+ * These fields are dynamically updated as necessary during the map:
+ *
+ *   ctag:        Comptag line in the comptag cache;
+ *                updated every time we write a PTE.
+ */
+struct nvgpu_gmmu_attrs {
+        u32                      pgsz;
+        u32                      kind_v;
+        u64                      ctag;
+        bool                     cacheable;
+        int                      rw_flag;
+        bool                     sparse;
+        bool                     priv;
+        bool                     valid;
+        enum nvgpu_aperture      aperture;
+        bool                     debug;
 };
 struct gk20a_mmu_level {
        int hi_bit[2];
        int lo_bit[2];
-        int (*update_entry)(struct vm_gk20a *vm,
-                           struct gk20a_mm_entry *pte,
+        /*
-                           u32 i, u32 gmmu_pgsz_idx,
+         * Build map from virt_addr -> phys_addr.
-                           struct scatterlist **sgl,
+         */
-                           u64 *offset,
+        void (*update_entry)(struct vm_gk20a *vm,
-                           u64 *iova,
+                             const struct gk20a_mmu_level *l,
-                           u32 kind_v, u64 *ctag,
+                             struct nvgpu_gmmu_pd *pd,
-                           bool cacheable, bool unmapped_pte,
+                             u32 pd_idx,
-                           int rw_flag, bool sparse, bool priv,
+                             u64 phys_addr,
-                           enum nvgpu_aperture aperture);
+                             u64 virt_addr,
-        size_t entry_size;
+                             struct nvgpu_gmmu_attrs *attrs);
+        u32 entry_size;
 };
-int nvgpu_zalloc_gmmu_page_table(struct vm_gk20a *vm,
+static inline const char *nvgpu_gmmu_perm_str(enum gk20a_mem_rw_flag p)
-                                 enum gmmu_pgsz_gk20a pgsz_idx,
+{
-                                 const struct gk20a_mmu_level *l,
+        switch (p) {
-                                 struct gk20a_mm_entry *entry,
+        case gk20a_mem_flag_none:
-                                 struct gk20a_mm_entry *prev_entry);
+                return "RW";
+        case gk20a_mem_flag_write_only:
+                return "WO";
+        case gk20a_mem_flag_read_only:
+                return "RO";
+        default:
+                return "??";
+        }
+}
+int nvgpu_gmmu_init_page_table(struct vm_gk20a *vm);
 /**
 * nvgpu_gmmu_map - Map memory into the GMMU.
@@ -106,6 +167,33 @@ void nvgpu_gmmu_unmap(struct vm_gk20a *vm,
                      u64 gpu_va);
 void nvgpu_free_gmmu_pages(struct vm_gk20a *vm,
-                     struct gk20a_mm_entry *entry);
+                     struct nvgpu_gmmu_pd *entry);
+/*
+ * Some useful routines that are shared across chips.
+ */
+static inline u32 pd_offset_from_index(const struct gk20a_mmu_level *l,
+                                       u32 pd_idx)
+{
+        return (pd_idx * l->entry_size) / sizeof(u32);
+}
+static inline void pd_write(struct gk20a *g, struct nvgpu_gmmu_pd *pd,
+                            size_t w, size_t data)
+{
+        nvgpu_mem_wr32(g, &pd->mem, w, data);
+}
+/*
+ * Internal debugging routines. Probably not something you want to use.
+ */
+#define pte_dbg(g, attrs, fmt, args...)                                 \
+        do {                                                            \
+                if (attrs && attrs->debug)                              \
+                        nvgpu_info(g, fmt, ##args);                     \
+                else                                                    \
+                        nvgpu_log(g, gpu_dbg_pte, fmt, ##args);         \
+        } while (0)
 #endif
diff --git a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h
index 66d04ab8..4259d40f 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h
@@ -109,9 +109,9 @@ nvgpu_mem_from_clear_list_entry(struct nvgpu_list_node *node)
 static inline const char *nvgpu_aperture_str(enum nvgpu_aperture aperture)
 {
        switch (aperture) {
-                case APERTURE_INVALID: return "invalid";
+                case APERTURE_INVALID: return "INVAL";
-                case APERTURE_SYSMEM:  return "sysmem";
+                case APERTURE_SYSMEM:  return "SYSMEM";
-                case APERTURE_VIDMEM:  return "vidmem";
+                case APERTURE_VIDMEM:  return "VIDMEM";
        };
        return "UNKNOWN";
 }
diff --git a/drivers/gpu/nvgpu/include/nvgpu/vm.h b/drivers/gpu/nvgpu/include/nvgpu/vm.h
index f6d88cc3..255b4361 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/vm.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/vm.h
@@ -126,6 +126,7 @@ mapped_buffer_from_rbtree_node(struct nvgpu_rbtree_node *node)
 struct vm_gk20a {
        struct mm_gk20a *mm;
        struct gk20a_as_share *as_share; /* as_share this represents */
+        char name[20];
        u64 va_start;
        u64 va_limit;
@@ -145,7 +146,7 @@ struct vm_gk20a {
        struct nvgpu_mutex update_gmmu_lock;
-        struct gk20a_mm_entry pdb;
+        struct nvgpu_gmmu_pd pdb;
        /*
         * These structs define the address spaces. In some cases it's possible
author	Alex Waterman <alexw@nvidia.com>	2017-05-11 16:59:22 -0400
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2017-07-06 17:44:15 -0400
commit	c1393d5b68e63c992f4c689cb788139fdf8c2f1a (patch)
tree	00a588d35342d75c05fed7733e91da753ba640fb /drivers/gpu/nvgpu/include
parent	84f712dee8b582dd7d2a19345c621a2ae3bd6292 (diff)

diff --git a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h index ed152cd8..28a2cb82 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
@@ -38,36 +38,97 @@ enum gmmu_pgsz_gk20a {
38	gmmu_nr_page_sizes = 3,	38	gmmu_nr_page_sizes = 3,
39	};	39	};
40		40
41	struct gk20a_mm_entry {	41	enum gk20a_mem_rw_flag {
42	/* backing for */	42	gk20a_mem_flag_none = 0, /* RW */
43	struct nvgpu_mem mem;	43	gk20a_mem_flag_read_only = 1, /* RO */
44	u32 woffset; /* if >0, mem is a shadow copy, owned by another entry */	44	gk20a_mem_flag_write_only = 2, /* WO */
45	int pgsz;	45	};
46	struct gk20a_mm_entry *entries;	46
47	int num_entries;	47	/*
		48	* GMMU page directory. This is the kernel's tracking of a list of PDEs or PTEs
		49	* in the GMMU.
		50	*/
		51	struct nvgpu_gmmu_pd {
		52	/*
		53	* DMA memory describing the PTEs or PTEs.
		54	*/
		55	struct nvgpu_mem mem;
		56
		57	/*
		58	* List of pointers to the next level of page tables. Does not
		59	* need to be populated when this PD is pointing to PTEs.
		60	*/
		61	struct nvgpu_gmmu_pd *entries;
		62	int num_entries;
		63	};
		64
		65	/*
		66	* Reduce the number of arguments getting passed through the various levels of
		67	* GMMU mapping functions.
		68	*
		69	* The following fields are set statically and do not change throughout
		70	* mapping call:
		71	*
		72	* pgsz: Index into the page size table.
		73	* kind_v: Kind attributes for mapping.
		74	* cacheable: Cacheability of the mapping.
		75	* rw_flag: Flag from enum gk20a_mem_rw_flag
		76	* sparse: Set if the mapping should be sparse.
		77	* priv: Privilidged mapping.
		78	* valid: Set if the PTE should be marked valid.
		79	* aperture: VIDMEM or SYSMEM.
		80	* debug: When set print debugging info.
		81	*
		82	* These fields are dynamically updated as necessary during the map:
		83	*
		84	* ctag: Comptag line in the comptag cache;
		85	* updated every time we write a PTE.
		86	*/
		87	struct nvgpu_gmmu_attrs {
		88	u32 pgsz;
		89	u32 kind_v;
		90	u64 ctag;
		91	bool cacheable;
		92	int rw_flag;
		93	bool sparse;
		94	bool priv;
		95	bool valid;
		96	enum nvgpu_aperture aperture;
		97	bool debug;
48	};	98	};
49		99
50	struct gk20a_mmu_level {	100	struct gk20a_mmu_level {
51	int hi_bit[2];	101	int hi_bit[2];
52	int lo_bit[2];	102	int lo_bit[2];
53	int (update_entry)(struct vm_gk20a vm,	103
54	struct gk20a_mm_entry *pte,	104	/*
55	u32 i, u32 gmmu_pgsz_idx,	105	* Build map from virt_addr -> phys_addr.
56	struct scatterlist **sgl,	106	*/
57	u64 *offset,	107	void (update_entry)(struct vm_gk20a vm,
58	u64 *iova,	108	const struct gk20a_mmu_level *l,
59	u32 kind_v, u64 *ctag,	109	struct nvgpu_gmmu_pd *pd,
60	bool cacheable, bool unmapped_pte,	110	u32 pd_idx,
61	int rw_flag, bool sparse, bool priv,	111	u64 phys_addr,
62	enum nvgpu_aperture aperture);	112	u64 virt_addr,
63	size_t entry_size;	113	struct nvgpu_gmmu_attrs *attrs);
		114	u32 entry_size;
64	};	115	};
65		116
66	int nvgpu_zalloc_gmmu_page_table(struct vm_gk20a *vm,	117	static inline const char *nvgpu_gmmu_perm_str(enum gk20a_mem_rw_flag p)
67	enum gmmu_pgsz_gk20a pgsz_idx,	118	{
68	const struct gk20a_mmu_level *l,	119	switch (p) {
69	struct gk20a_mm_entry *entry,	120	case gk20a_mem_flag_none:
70	struct gk20a_mm_entry *prev_entry);	121	return "RW";
		122	case gk20a_mem_flag_write_only:
		123	return "WO";
		124	case gk20a_mem_flag_read_only:
		125	return "RO";
		126	default:
		127	return "??";
		128	}
		129	}
		130
		131	int nvgpu_gmmu_init_page_table(struct vm_gk20a *vm);
71		132
72	/**	133	/**
73	* nvgpu_gmmu_map - Map memory into the GMMU.	134	* nvgpu_gmmu_map - Map memory into the GMMU.
@@ -106,6 +167,33 @@ void nvgpu_gmmu_unmap(struct vm_gk20a *vm,
106	u64 gpu_va);	167	u64 gpu_va);
107		168
108	void nvgpu_free_gmmu_pages(struct vm_gk20a *vm,	169	void nvgpu_free_gmmu_pages(struct vm_gk20a *vm,
109	struct gk20a_mm_entry *entry);	170	struct nvgpu_gmmu_pd *entry);
		171
		172	/*
		173	* Some useful routines that are shared across chips.
		174	*/
		175	static inline u32 pd_offset_from_index(const struct gk20a_mmu_level *l,
		176	u32 pd_idx)
		177	{
		178	return (pd_idx * l->entry_size) / sizeof(u32);
		179	}
		180
		181	static inline void pd_write(struct gk20a g, struct nvgpu_gmmu_pd pd,
		182	size_t w, size_t data)
		183	{
		184	nvgpu_mem_wr32(g, &pd->mem, w, data);
		185	}
		186
		187
		188	/*
		189	* Internal debugging routines. Probably not something you want to use.
		190	*/
		191	#define pte_dbg(g, attrs, fmt, args...) \
		192	do { \
		193	if (attrs && attrs->debug) \
		194	nvgpu_info(g, fmt, ##args); \
		195	else \
		196	nvgpu_log(g, gpu_dbg_pte, fmt, ##args); \
		197	} while (0)
110		198
111	#endif	199	#endif


diff --git a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h index 66d04ab8..4259d40f 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h +++ b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h
@@ -109,9 +109,9 @@ nvgpu_mem_from_clear_list_entry(struct nvgpu_list_node *node)
109	static inline const char *nvgpu_aperture_str(enum nvgpu_aperture aperture)	109	static inline const char *nvgpu_aperture_str(enum nvgpu_aperture aperture)
110	{	110	{
111	switch (aperture) {	111	switch (aperture) {
112	case APERTURE_INVALID: return "invalid";	112	case APERTURE_INVALID: return "INVAL";
113	case APERTURE_SYSMEM: return "sysmem";	113	case APERTURE_SYSMEM: return "SYSMEM";
114	case APERTURE_VIDMEM: return "vidmem";	114	case APERTURE_VIDMEM: return "VIDMEM";
115	};	115	};
116	return "UNKNOWN";	116	return "UNKNOWN";
117	}	117	}


diff --git a/drivers/gpu/nvgpu/include/nvgpu/vm.h b/drivers/gpu/nvgpu/include/nvgpu/vm.h index f6d88cc3..255b4361 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/vm.h +++ b/drivers/gpu/nvgpu/include/nvgpu/vm.h
@@ -126,6 +126,7 @@ mapped_buffer_from_rbtree_node(struct nvgpu_rbtree_node *node)
126	struct vm_gk20a {	126	struct vm_gk20a {
127	struct mm_gk20a *mm;	127	struct mm_gk20a *mm;
128	struct gk20a_as_share as_share; / as_share this represents */	128	struct gk20a_as_share as_share; / as_share this represents */
		129	char name[20];
129		130
130	u64 va_start;	131	u64 va_start;
131	u64 va_limit;	132	u64 va_limit;
@@ -145,7 +146,7 @@ struct vm_gk20a {
145		146
146	struct nvgpu_mutex update_gmmu_lock;	147	struct nvgpu_mutex update_gmmu_lock;
147		148
148	struct gk20a_mm_entry pdb;	149	struct nvgpu_gmmu_pd pdb;
149		150
150	/*	151	/*
151	* These structs define the address spaces. In some cases it's possible	152	* These structs define the address spaces. In some cases it's possible