1 files changed, 369 insertions, 0 deletions
diff --git a/include/nvgpu/gmmu.h b/include/nvgpu/gmmu.h
new file mode 100644
index 0000000..2fc0d44
--- /dev/null
+++ b/include/nvgpu/gmmu.h
@@ -0,0 +1,369 @@
+/*
+ * Copyright (c) 2017-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#ifndef NVGPU_GMMU_H
+#define NVGPU_GMMU_H
+#include <nvgpu/types.h>
+#include <nvgpu/nvgpu_mem.h>
+#include <nvgpu/list.h>
+#include <nvgpu/rbtree.h>
+#include <nvgpu/lock.h>
+#include <nvgpu/bitops.h>
+#include <nvgpu/mm.h>
+/*
+ * This is the GMMU API visible to blocks outside of the GMMU. Basically this
+ * API supports all the different types of mappings that might be done in the
+ * GMMU.
+ */
+struct vm_gk20a;
+struct nvgpu_mem;
+#define GMMU_PAGE_SIZE_SMALL    0U
+#define GMMU_PAGE_SIZE_BIG      1U
+#define GMMU_PAGE_SIZE_KERNEL   2U
+#define GMMU_NR_PAGE_SIZES      3U
+enum gk20a_mem_rw_flag {
+        gk20a_mem_flag_none = 0,        /* RW */
+        gk20a_mem_flag_read_only = 1,   /* RO */
+        gk20a_mem_flag_write_only = 2,  /* WO */
+};
+/*
+ * Minimum size of a cache. The number of different caches in the nvgpu_pd_cache
+ * structure is of course depending on this. The MIN_SHIFT define is the right
+ * number of bits to shift to determine which list to use in the array of lists.
+ *
+ * For Linux, limit the use of the cache to entries less than the page size, to
+ * avoid potential problems with running out of CMA memory when allocating large,
+ * contiguous slabs, as would be required for non-iommmuable chips.
+ */
+#define NVGPU_PD_CACHE_MIN              256U
+#define NVGPU_PD_CACHE_MIN_SHIFT        9U
+#ifdef __KERNEL__
+#if PAGE_SIZE == 4096
+#define NVGPU_PD_CACHE_COUNT            4U
+#elif PAGE_SIZE == 65536
+#define NVGPU_PD_CACHE_COUNT            8U
+#else
+#error "Unsupported page size."
+#endif
+#else
+#define NVGPU_PD_CACHE_COUNT            8U
+#endif
+#define NVGPU_PD_CACHE_SIZE             (NVGPU_PD_CACHE_MIN * (1U << NVGPU_PD_CACHE_COUNT))
+struct nvgpu_pd_mem_entry {
+        struct nvgpu_mem                mem;
+        /*
+         * Size of the page directories (not the mem). alloc_map is a bitmap
+         * showing which PDs have been allocated.
+         *
+         * The size of mem will be NVGPU_PD_CACHE_SIZE
+         * and pd_size will always be a power of 2.
+         *
+         */
+        u32                             pd_size;
+        DECLARE_BITMAP(alloc_map, NVGPU_PD_CACHE_SIZE / NVGPU_PD_CACHE_MIN);
+        /* Total number of allocations in this PD. */
+        u32                             allocs;
+        struct nvgpu_list_node          list_entry;
+        struct nvgpu_rbtree_node        tree_entry;
+};
+static inline struct nvgpu_pd_mem_entry *
+nvgpu_pd_mem_entry_from_list_entry(struct nvgpu_list_node *node)
+{
+        return (struct nvgpu_pd_mem_entry *)
+                ((uintptr_t)node -
+                 offsetof(struct nvgpu_pd_mem_entry, list_entry));
+};
+static inline struct nvgpu_pd_mem_entry *
+nvgpu_pd_mem_entry_from_tree_entry(struct nvgpu_rbtree_node *node)
+{
+        return (struct nvgpu_pd_mem_entry *)
+                ((uintptr_t)node -
+                 offsetof(struct nvgpu_pd_mem_entry, tree_entry));
+};
+/*
+ * A cache for allocating PD memory from. This enables smaller PDs to be packed
+ * into single pages.
+ *
+ * This is fairly complex so see the documentation in pd_cache.c for a full
+ * description of how this is organized.
+ */
+struct nvgpu_pd_cache {
+        /*
+         * Array of lists of full nvgpu_pd_mem_entries and partially full (or
+         * empty) nvgpu_pd_mem_entries.
+         */
+        struct nvgpu_list_node           full[NVGPU_PD_CACHE_COUNT];
+        struct nvgpu_list_node           partial[NVGPU_PD_CACHE_COUNT];
+        /*
+         * Tree of all allocated struct nvgpu_mem's for fast look up.
+         */
+        struct nvgpu_rbtree_node        *mem_tree;
+        /*
+         * All access to the cache much be locked. This protects the lists and
+         * the rb tree.
+         */
+        struct nvgpu_mutex               lock;
+};
+/*
+ * GMMU page directory. This is the kernel's tracking of a list of PDEs or PTEs
+ * in the GMMU.
+ */
+struct nvgpu_gmmu_pd {
+        /*
+         * DMA memory describing the PTEs or PDEs. @mem_offs describes the
+         * offset of the PDE table in @mem. @cached specifies if this PD is
+         * using pd_cache memory.
+         */
+        struct nvgpu_mem        *mem;
+        u32                      mem_offs;
+        bool                     cached;
+        /*
+         * List of pointers to the next level of page tables. Does not
+         * need to be populated when this PD is pointing to PTEs.
+         */
+        struct nvgpu_gmmu_pd    *entries;
+        int                      num_entries;
+};
+/*
+ * Reduce the number of arguments getting passed through the various levels of
+ * GMMU mapping functions.
+ *
+ * The following fields are set statically and do not change throughout the
+ * mapping call:
+ *
+ *   pgsz:        Index into the page size table.
+ *   kind_v:      Kind attributes for mapping.
+ *   cacheable:   Cacheability of the mapping.
+ *   rw_flag:     Flag from enum gk20a_mem_rw_flag
+ *   sparse:      Set if the mapping should be sparse.
+ *   priv:        Privilidged mapping.
+ *   coherent:    Set if the mapping should be IO coherent.
+ *   valid:       Set if the PTE should be marked valid.
+ *   aperture:    VIDMEM or SYSMEM.
+ *   debug:       When set print debugging info.
+ *   platform_atomic: True if platform_atomic flag is valid.
+ *
+ * These fields are dynamically updated as necessary during the map:
+ *
+ *   ctag:        Comptag line in the comptag cache;
+ *                updated every time we write a PTE.
+ */
+struct nvgpu_gmmu_attrs {
+        u32                      pgsz;
+        u32                      kind_v;
+        u64                      ctag;
+        bool                     cacheable;
+        enum gk20a_mem_rw_flag   rw_flag;
+        bool                     sparse;
+        bool                     priv;
+        bool                     valid;
+        enum nvgpu_aperture      aperture;
+        bool                     debug;
+        bool                     l3_alloc;
+        bool                     platform_atomic;
+};
+struct gk20a_mmu_level {
+        int hi_bit[2];
+        int lo_bit[2];
+        /*
+         * Build map from virt_addr -> phys_addr.
+         */
+        void (*update_entry)(struct vm_gk20a *vm,
+                             const struct gk20a_mmu_level *l,
+                             struct nvgpu_gmmu_pd *pd,
+                             u32 pd_idx,
+                             u64 phys_addr,
+                             u64 virt_addr,
+                             struct nvgpu_gmmu_attrs *attrs);
+        u32 entry_size;
+        /*
+         * Get pde page size
+         */
+        u32 (*get_pgsz)(struct gk20a *g, const struct gk20a_mmu_level *l,
+                                struct nvgpu_gmmu_pd *pd, u32 pd_idx);
+};
+static inline const char *nvgpu_gmmu_perm_str(enum gk20a_mem_rw_flag p)
+{
+        switch (p) {
+        case gk20a_mem_flag_none:
+                return "RW";
+        case gk20a_mem_flag_write_only:
+                return "WO";
+        case gk20a_mem_flag_read_only:
+                return "RO";
+        default:
+                return "??";
+        }
+}
+int nvgpu_gmmu_init_page_table(struct vm_gk20a *vm);
+/**
+ * nvgpu_gmmu_map - Map memory into the GMMU.
+ *
+ * Kernel space.
+ */
+u64 nvgpu_gmmu_map(struct vm_gk20a *vm,
+                   struct nvgpu_mem *mem,
+                   u64 size,
+                   u32 flags,
+                   enum gk20a_mem_rw_flag rw_flag,
+                   bool priv,
+                   enum nvgpu_aperture aperture);
+/**
+ * nvgpu_gmmu_map_fixed - Map memory into the GMMU.
+ *
+ * Kernel space.
+ */
+u64 nvgpu_gmmu_map_fixed(struct vm_gk20a *vm,
+                         struct nvgpu_mem *mem,
+                         u64 addr,
+                         u64 size,
+                         u32 flags,
+                         enum gk20a_mem_rw_flag rw_flag,
+                         bool priv,
+                         enum nvgpu_aperture aperture);
+/**
+ * nvgpu_gmmu_unmap - Unmap a buffer.
+ *
+ * Kernel space.
+ */
+void nvgpu_gmmu_unmap(struct vm_gk20a *vm,
+                      struct nvgpu_mem *mem,
+                      u64 gpu_va);
+int nvgpu_pd_alloc(struct vm_gk20a *vm,
+                   struct nvgpu_gmmu_pd *pd,
+                   u32 bytes);
+void nvgpu_pd_free(struct vm_gk20a *vm, struct nvgpu_gmmu_pd *pd);
+int nvgpu_pd_cache_alloc_direct(struct gk20a *g,
+                                  struct nvgpu_gmmu_pd *pd, u32 bytes);
+void nvgpu_pd_cache_free_direct(struct gk20a *g, struct nvgpu_gmmu_pd *pd);
+int nvgpu_pd_cache_init(struct gk20a *g);
+void nvgpu_pd_cache_fini(struct gk20a *g);
+/*
+ * Some useful routines that are shared across chips.
+ */
+static inline u32 pd_offset_from_index(const struct gk20a_mmu_level *l,
+                                       u32 pd_idx)
+{
+        return (pd_idx * l->entry_size) / sizeof(u32);
+}
+static inline void pd_write(struct gk20a *g, struct nvgpu_gmmu_pd *pd,
+                            size_t w, size_t data)
+{
+        nvgpu_mem_wr32(g, pd->mem, (pd->mem_offs / sizeof(u32)) + w, data);
+}
+/**
+ * __nvgpu_pte_words - Compute number of words in a PTE.
+ *
+ * @g  - The GPU.
+ *
+ * This computes and returns the size of a PTE for the passed chip.
+ */
+u32 __nvgpu_pte_words(struct gk20a *g);
+/**
+ * __nvgpu_get_pte - Get the contents of a PTE by virtual address
+ *
+ * @g     - The GPU.
+ * @vm    - VM to look in.
+ * @vaddr - GPU virtual address.
+ * @pte   - [out] Set to the contents of the PTE.
+ *
+ * Find a PTE in the passed VM based on the passed GPU virtual address. This
+ * will @pte with a copy of the contents of the PTE. @pte must be an array of
+ * u32s large enough to contain the PTE. This can be computed using
+ * __nvgpu_pte_words().
+ *
+ * If you wish to write to this PTE then you may modify @pte and then use the
+ * __nvgpu_set_pte().
+ *
+ * This function returns 0 if the PTE is found and -EINVAL otherwise.
+ */
+int __nvgpu_get_pte(struct gk20a *g, struct vm_gk20a *vm, u64 vaddr, u32 *pte);
+/**
+ * __nvgpu_set_pte - Set a PTE based on virtual address
+ *
+ * @g     - The GPU.
+ * @vm    - VM to look in.
+ * @vaddr - GPU virtual address.
+ * @pte   - The contents of the PTE to write.
+ *
+ * Find a PTE and overwrite the contents of that PTE with the passed in data
+ * located in @pte. If the PTE does not exist then no writing will happen. That
+ * is this function will not fill out the page tables for you. The expectation
+ * is that the passed @vaddr has already been mapped and this is just modifying
+ * the mapping (for instance changing invalid to valid).
+ *
+ * @pte must contain at least the required words for the PTE. See
+ * __nvgpu_pte_words().
+ *
+ * This function returns 0 on success and -EINVAL otherwise.
+ */
+int __nvgpu_set_pte(struct gk20a *g, struct vm_gk20a *vm, u64 vaddr, u32 *pte);
+/*
+ * Internal debugging routines. Probably not something you want to use.
+ */
+#define pte_dbg(g, attrs, fmt, args...)                                 \
+        do {                                                            \
+                if ((attrs != NULL) && (attrs->debug))                  \
+                        nvgpu_info(g, fmt, ##args);                     \
+                else                                                    \
+                        nvgpu_log(g, gpu_dbg_pte, fmt, ##args);         \
+        } while (0)
+#endif /* NVGPU_GMMU_H */

diff --git a/include/nvgpu/gmmu.h b/include/nvgpu/gmmu.h new file mode 100644 index 0000000..2fc0d44 --- /dev/null +++ b/include/nvgpu/gmmu.h
@@ -0,0 +1,369 @@
	1	/*
	2	* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
	3	*
	4	* Permission is hereby granted, free of charge, to any person obtaining a
	5	* copy of this software and associated documentation files (the "Software"),
	6	* to deal in the Software without restriction, including without limitation
	7	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
	8	* and/or sell copies of the Software, and to permit persons to whom the
	9	* Software is furnished to do so, subject to the following conditions:
	10	*
	11	* The above copyright notice and this permission notice shall be included in
	12	* all copies or substantial portions of the Software.
	13	*
	14	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	15	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	16	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
	17	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	18	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
	19	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
	20	* DEALINGS IN THE SOFTWARE.
	21	*/
	22
	23	#ifndef NVGPU_GMMU_H
	24	#define NVGPU_GMMU_H
	25
	26	#include <nvgpu/types.h>
	27	#include <nvgpu/nvgpu_mem.h>
	28	#include <nvgpu/list.h>
	29	#include <nvgpu/rbtree.h>
	30	#include <nvgpu/lock.h>
	31	#include <nvgpu/bitops.h>
	32	#include <nvgpu/mm.h>
	33
	34	/*
	35	* This is the GMMU API visible to blocks outside of the GMMU. Basically this
	36	* API supports all the different types of mappings that might be done in the
	37	* GMMU.
	38	*/
	39
	40	struct vm_gk20a;
	41	struct nvgpu_mem;
	42
	43	#define GMMU_PAGE_SIZE_SMALL 0U
	44	#define GMMU_PAGE_SIZE_BIG 1U
	45	#define GMMU_PAGE_SIZE_KERNEL 2U
	46	#define GMMU_NR_PAGE_SIZES 3U
	47
	48	enum gk20a_mem_rw_flag {
	49	gk20a_mem_flag_none = 0, /* RW */
	50	gk20a_mem_flag_read_only = 1, /* RO */
	51	gk20a_mem_flag_write_only = 2, /* WO */
	52	};
	53
	54	/*
	55	* Minimum size of a cache. The number of different caches in the nvgpu_pd_cache
	56	* structure is of course depending on this. The MIN_SHIFT define is the right
	57	* number of bits to shift to determine which list to use in the array of lists.
	58	*
	59	* For Linux, limit the use of the cache to entries less than the page size, to
	60	* avoid potential problems with running out of CMA memory when allocating large,
	61	* contiguous slabs, as would be required for non-iommmuable chips.
	62	*/
	63	#define NVGPU_PD_CACHE_MIN 256U
	64	#define NVGPU_PD_CACHE_MIN_SHIFT 9U
	65
	66	#ifdef __KERNEL__
	67
	68	#if PAGE_SIZE == 4096
	69	#define NVGPU_PD_CACHE_COUNT 4U
	70	#elif PAGE_SIZE == 65536
	71	#define NVGPU_PD_CACHE_COUNT 8U
	72	#else
	73	#error "Unsupported page size."
	74	#endif
	75
	76	#else
	77	#define NVGPU_PD_CACHE_COUNT 8U
	78	#endif
	79
	80	#define NVGPU_PD_CACHE_SIZE (NVGPU_PD_CACHE_MIN * (1U << NVGPU_PD_CACHE_COUNT))
	81
	82	struct nvgpu_pd_mem_entry {
	83	struct nvgpu_mem mem;
	84
	85	/*
	86	* Size of the page directories (not the mem). alloc_map is a bitmap
	87	* showing which PDs have been allocated.
	88	*
	89	* The size of mem will be NVGPU_PD_CACHE_SIZE
	90	* and pd_size will always be a power of 2.
	91	*
	92	*/
	93	u32 pd_size;
	94	DECLARE_BITMAP(alloc_map, NVGPU_PD_CACHE_SIZE / NVGPU_PD_CACHE_MIN);
	95
	96	/* Total number of allocations in this PD. */
	97	u32 allocs;
	98
	99	struct nvgpu_list_node list_entry;
	100	struct nvgpu_rbtree_node tree_entry;
	101	};
	102
	103	static inline struct nvgpu_pd_mem_entry *
	104	nvgpu_pd_mem_entry_from_list_entry(struct nvgpu_list_node *node)
	105	{
	106	return (struct nvgpu_pd_mem_entry *)
	107	((uintptr_t)node -
	108	offsetof(struct nvgpu_pd_mem_entry, list_entry));
	109	};
	110
	111	static inline struct nvgpu_pd_mem_entry *
	112	nvgpu_pd_mem_entry_from_tree_entry(struct nvgpu_rbtree_node *node)
	113	{
	114	return (struct nvgpu_pd_mem_entry *)
	115	((uintptr_t)node -
	116	offsetof(struct nvgpu_pd_mem_entry, tree_entry));
	117	};
	118
	119	/*
	120	* A cache for allocating PD memory from. This enables smaller PDs to be packed
	121	* into single pages.
	122	*
	123	* This is fairly complex so see the documentation in pd_cache.c for a full
	124	* description of how this is organized.
	125	*/
	126	struct nvgpu_pd_cache {
	127	/*
	128	* Array of lists of full nvgpu_pd_mem_entries and partially full (or
	129	* empty) nvgpu_pd_mem_entries.
	130	*/
	131	struct nvgpu_list_node full[NVGPU_PD_CACHE_COUNT];
	132	struct nvgpu_list_node partial[NVGPU_PD_CACHE_COUNT];
	133
	134	/*
	135	* Tree of all allocated struct nvgpu_mem's for fast look up.
	136	*/
	137	struct nvgpu_rbtree_node *mem_tree;
	138
	139	/*
	140	* All access to the cache much be locked. This protects the lists and
	141	* the rb tree.
	142	*/
	143	struct nvgpu_mutex lock;
	144	};
	145
	146	/*
	147	* GMMU page directory. This is the kernel's tracking of a list of PDEs or PTEs
	148	* in the GMMU.
	149	*/
	150	struct nvgpu_gmmu_pd {
	151	/*
	152	* DMA memory describing the PTEs or PDEs. @mem_offs describes the
	153	* offset of the PDE table in @mem. @cached specifies if this PD is
	154	* using pd_cache memory.
	155	*/
	156	struct nvgpu_mem *mem;
	157	u32 mem_offs;
	158	bool cached;
	159
	160	/*
	161	* List of pointers to the next level of page tables. Does not
	162	* need to be populated when this PD is pointing to PTEs.
	163	*/
	164	struct nvgpu_gmmu_pd *entries;
	165	int num_entries;
	166	};
	167
	168	/*
	169	* Reduce the number of arguments getting passed through the various levels of
	170	* GMMU mapping functions.
	171	*
	172	* The following fields are set statically and do not change throughout the
	173	* mapping call:
	174	*
	175	* pgsz: Index into the page size table.
	176	* kind_v: Kind attributes for mapping.
	177	* cacheable: Cacheability of the mapping.
	178	* rw_flag: Flag from enum gk20a_mem_rw_flag
	179	* sparse: Set if the mapping should be sparse.
	180	* priv: Privilidged mapping.
	181	* coherent: Set if the mapping should be IO coherent.
	182	* valid: Set if the PTE should be marked valid.
	183	* aperture: VIDMEM or SYSMEM.
	184	* debug: When set print debugging info.
	185	* platform_atomic: True if platform_atomic flag is valid.
	186	*
	187	* These fields are dynamically updated as necessary during the map:
	188	*
	189	* ctag: Comptag line in the comptag cache;
	190	* updated every time we write a PTE.
	191	*/
	192	struct nvgpu_gmmu_attrs {
	193	u32 pgsz;
	194	u32 kind_v;
	195	u64 ctag;
	196	bool cacheable;
	197	enum gk20a_mem_rw_flag rw_flag;
	198	bool sparse;
	199	bool priv;
	200	bool valid;
	201	enum nvgpu_aperture aperture;
	202	bool debug;
	203	bool l3_alloc;
	204	bool platform_atomic;
	205	};
	206
	207	struct gk20a_mmu_level {
	208	int hi_bit[2];
	209	int lo_bit[2];
	210
	211	/*
	212	* Build map from virt_addr -> phys_addr.
	213	*/
	214	void (update_entry)(struct vm_gk20a vm,
	215	const struct gk20a_mmu_level *l,
	216	struct nvgpu_gmmu_pd *pd,
	217	u32 pd_idx,
	218	u64 phys_addr,
	219	u64 virt_addr,
	220	struct nvgpu_gmmu_attrs *attrs);
	221	u32 entry_size;
	222	/*
	223	* Get pde page size
	224	*/
	225	u32 (get_pgsz)(struct gk20a g, const struct gk20a_mmu_level *l,
	226	struct nvgpu_gmmu_pd *pd, u32 pd_idx);
	227	};
	228
	229	static inline const char *nvgpu_gmmu_perm_str(enum gk20a_mem_rw_flag p)
	230	{
	231	switch (p) {
	232	case gk20a_mem_flag_none:
	233	return "RW";
	234	case gk20a_mem_flag_write_only:
	235	return "WO";
	236	case gk20a_mem_flag_read_only:
	237	return "RO";
	238	default:
	239	return "??";
	240	}
	241	}
	242
	243	int nvgpu_gmmu_init_page_table(struct vm_gk20a *vm);
	244
	245	/**
	246	* nvgpu_gmmu_map - Map memory into the GMMU.
	247	*
	248	* Kernel space.
	249	*/
	250	u64 nvgpu_gmmu_map(struct vm_gk20a *vm,
	251	struct nvgpu_mem *mem,
	252	u64 size,
	253	u32 flags,
	254	enum gk20a_mem_rw_flag rw_flag,
	255	bool priv,
	256	enum nvgpu_aperture aperture);
	257
	258	/**
	259	* nvgpu_gmmu_map_fixed - Map memory into the GMMU.
	260	*
	261	* Kernel space.
	262	*/
	263	u64 nvgpu_gmmu_map_fixed(struct vm_gk20a *vm,
	264	struct nvgpu_mem *mem,
	265	u64 addr,
	266	u64 size,
	267	u32 flags,
	268	enum gk20a_mem_rw_flag rw_flag,
	269	bool priv,
	270	enum nvgpu_aperture aperture);
	271
	272	/**
	273	* nvgpu_gmmu_unmap - Unmap a buffer.
	274	*
	275	* Kernel space.
	276	*/
	277	void nvgpu_gmmu_unmap(struct vm_gk20a *vm,
	278	struct nvgpu_mem *mem,
	279	u64 gpu_va);
	280
	281	int nvgpu_pd_alloc(struct vm_gk20a *vm,
	282	struct nvgpu_gmmu_pd *pd,
	283	u32 bytes);
	284
	285	void nvgpu_pd_free(struct vm_gk20a vm, struct nvgpu_gmmu_pd pd);
	286	int nvgpu_pd_cache_alloc_direct(struct gk20a *g,
	287	struct nvgpu_gmmu_pd *pd, u32 bytes);
	288	void nvgpu_pd_cache_free_direct(struct gk20a g, struct nvgpu_gmmu_pd pd);
	289	int nvgpu_pd_cache_init(struct gk20a *g);
	290	void nvgpu_pd_cache_fini(struct gk20a *g);
	291
	292	/*
	293	* Some useful routines that are shared across chips.
	294	*/
	295	static inline u32 pd_offset_from_index(const struct gk20a_mmu_level *l,
	296	u32 pd_idx)
	297	{
	298	return (pd_idx * l->entry_size) / sizeof(u32);
	299	}
	300
	301	static inline void pd_write(struct gk20a g, struct nvgpu_gmmu_pd pd,
	302	size_t w, size_t data)
	303	{
	304	nvgpu_mem_wr32(g, pd->mem, (pd->mem_offs / sizeof(u32)) + w, data);
	305	}
	306
	307	/**
	308	* __nvgpu_pte_words - Compute number of words in a PTE.
	309	*
	310	* @g - The GPU.
	311	*
	312	* This computes and returns the size of a PTE for the passed chip.
	313	*/
	314	u32 __nvgpu_pte_words(struct gk20a *g);
	315
	316	/**
	317	* __nvgpu_get_pte - Get the contents of a PTE by virtual address
	318	*
	319	* @g - The GPU.
	320	* @vm - VM to look in.
	321	* @vaddr - GPU virtual address.
	322	* @pte - [out] Set to the contents of the PTE.
	323	*
	324	* Find a PTE in the passed VM based on the passed GPU virtual address. This
	325	* will @pte with a copy of the contents of the PTE. @pte must be an array of
	326	* u32s large enough to contain the PTE. This can be computed using
	327	* __nvgpu_pte_words().
	328	*
	329	* If you wish to write to this PTE then you may modify @pte and then use the
	330	* __nvgpu_set_pte().
	331	*
	332	* This function returns 0 if the PTE is found and -EINVAL otherwise.
	333	*/
	334	int __nvgpu_get_pte(struct gk20a g, struct vm_gk20a vm, u64 vaddr, u32 *pte);
	335
	336	/**
	337	* __nvgpu_set_pte - Set a PTE based on virtual address
	338	*
	339	* @g - The GPU.
	340	* @vm - VM to look in.
	341	* @vaddr - GPU virtual address.
	342	* @pte - The contents of the PTE to write.
	343	*
	344	* Find a PTE and overwrite the contents of that PTE with the passed in data
	345	* located in @pte. If the PTE does not exist then no writing will happen. That
	346	* is this function will not fill out the page tables for you. The expectation
	347	* is that the passed @vaddr has already been mapped and this is just modifying
	348	* the mapping (for instance changing invalid to valid).
	349	*
	350	* @pte must contain at least the required words for the PTE. See
	351	* __nvgpu_pte_words().
	352	*
	353	* This function returns 0 on success and -EINVAL otherwise.
	354	*/
	355	int __nvgpu_set_pte(struct gk20a g, struct vm_gk20a vm, u64 vaddr, u32 *pte);
	356
	357
	358	/*
	359	* Internal debugging routines. Probably not something you want to use.
	360	*/
	361	#define pte_dbg(g, attrs, fmt, args...) \
	362	do { \
	363	if ((attrs != NULL) && (attrs->debug)) \
	364	nvgpu_info(g, fmt, ##args); \
	365	else \
	366	nvgpu_log(g, gpu_dbg_pte, fmt, ##args); \
	367	} while (0)
	368
	369	#endif /* NVGPU_GMMU_H */