summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/include
diff options
context:
space:
mode:
authorAlex Waterman <alexw@nvidia.com>2017-06-09 14:42:50 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-07-06 17:44:16 -0400
commit583704620db88e391f6b14acc57af859a70127de (patch)
tree8fc3becf2850b724e87011b0e0250c52d0efb7ee /drivers/gpu/nvgpu/include
parentc1393d5b68e63c992f4c689cb788139fdf8c2f1a (diff)
gpu: nvgpu: Implement PD packing
In some cases page directories require less than a full page of memory. For example, on Pascal, the final PD level for large pages is only 256 bytes; thus 16 PDs can fit in a single page. To allocate an entire page for each of these 256 B PDs is extremely wasteful. This patch aims to alleviate the wasted DMA memory from having small PDs in a full page by packing multiple small PDs into a single page. The packing is implemented as a slab allocator - each page is a slab and from each page multiple PD instances can be allocated. Several modifications to the nvgpu_gmmu_pd struct also needed to be made to support this. The nvgpu_mem is now a pointer and there's an explicit offset into the nvgpu_mem struct so that each nvgpu_gmmu_pd knows what portion of the memory it's using. The nvgpu_pde_phys_addr() function and the pd_write() functions also require some changes since the PD no longer is always situated at the start of the nvgpu_mem. Initialization and cleanup of the page tables for each VM was slightly modified to work through the new pd_cache implementation. Some PDs (i.e the PDB), despite not being a full page, still require a full page for alignment purposes (HW requirements). Thus a direct allocation method for PDs is still provided. This is also used when a PD that could in principle be cached is greater than a page in size. Lastly a new debug flag was added for the pd_cache code. JIRA NVGPU-30 Change-Id: I64c8037fc356783c1ef203cc143c4d71bbd5d77c Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: https://git-master/r/1506610 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> GVS: Gerrit_Virtual_Submit
Diffstat (limited to 'drivers/gpu/nvgpu/include')
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/gmmu.h91
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/log.h1
2 files changed, 86 insertions, 6 deletions
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
index 28a2cb82..eff87c31 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
@@ -19,6 +19,9 @@
19 19
20#include <nvgpu/types.h> 20#include <nvgpu/types.h>
21#include <nvgpu/nvgpu_mem.h> 21#include <nvgpu/nvgpu_mem.h>
22#include <nvgpu/list.h>
23#include <nvgpu/rbtree.h>
24#include <nvgpu/lock.h>
22 25
23struct scatterlist; 26struct scatterlist;
24 27
@@ -45,14 +48,85 @@ enum gk20a_mem_rw_flag {
45}; 48};
46 49
47/* 50/*
51 * Minimum size of a cache. The number of different caches in the nvgpu_pd_cache
52 * structure is of course depending on this. The MIN_SHIFT define is the right
53 * number of bits to shift to determine which list to use in the array of lists.
54 */
55#define NVGPU_PD_CACHE_MIN 256
56#define NVGPU_PD_CACHE_MIN_SHIFT 9
57#define NVGPU_PD_CACHE_COUNT 4
58
59struct nvgpu_pd_mem_entry {
60 struct nvgpu_mem mem;
61
62 /*
63 * Size of the page directories (not the mem). bmap is a bitmap showing
64 * which PDs have been allocated. The size of mem will always be one
65 * page. pd_size will always be a power of 2.
66 */
67 u32 pd_size;
68 unsigned long alloc_map;
69
70 struct nvgpu_list_node list_entry;
71 struct nvgpu_rbtree_node tree_entry;
72};
73
74static inline struct nvgpu_pd_mem_entry *
75nvgpu_pd_mem_entry_from_list_entry(struct nvgpu_list_node *node)
76{
77 return (struct nvgpu_pd_mem_entry *)
78 ((uintptr_t)node -
79 offsetof(struct nvgpu_pd_mem_entry, list_entry));
80};
81
82static inline struct nvgpu_pd_mem_entry *
83nvgpu_pd_mem_entry_from_tree_entry(struct nvgpu_rbtree_node *node)
84{
85 return (struct nvgpu_pd_mem_entry *)
86 ((uintptr_t)node -
87 offsetof(struct nvgpu_pd_mem_entry, tree_entry));
88};
89
90/*
91 * A cache for allocating PD memory from. This enables smaller PDs to be packed
92 * into single pages.
93 *
94 * This is fairly complex so see the documentation in pd_cache.c for a full
95 * description of how this is organized.
96 */
97struct nvgpu_pd_cache {
98 /*
99 * Array of lists of full nvgpu_pd_mem_entries and partially full (or
100 * empty) nvgpu_pd_mem_entries.
101 */
102 struct nvgpu_list_node full[NVGPU_PD_CACHE_COUNT];
103 struct nvgpu_list_node partial[NVGPU_PD_CACHE_COUNT];
104
105 /*
106 * Tree of all allocated struct nvgpu_mem's for fast look up.
107 */
108 struct nvgpu_rbtree_node *mem_tree;
109
110 /*
111 * All access to the cache much be locked. This protects the lists and
112 * the rb tree.
113 */
114 struct nvgpu_mutex lock;
115};
116
117/*
48 * GMMU page directory. This is the kernel's tracking of a list of PDEs or PTEs 118 * GMMU page directory. This is the kernel's tracking of a list of PDEs or PTEs
49 * in the GMMU. 119 * in the GMMU.
50 */ 120 */
51struct nvgpu_gmmu_pd { 121struct nvgpu_gmmu_pd {
52 /* 122 /*
53 * DMA memory describing the PTEs or PTEs. 123 * DMA memory describing the PTEs or PDEs. @mem_offs describes the
124 * offset of the PDE table in @mem. @cached specifies if this PD is
125 * using pd_cache memory.
54 */ 126 */
55 struct nvgpu_mem mem; 127 struct nvgpu_mem *mem;
128 u32 mem_offs;
129 bool cached;
56 130
57 /* 131 /*
58 * List of pointers to the next level of page tables. Does not 132 * List of pointers to the next level of page tables. Does not
@@ -66,7 +140,7 @@ struct nvgpu_gmmu_pd {
66 * Reduce the number of arguments getting passed through the various levels of 140 * Reduce the number of arguments getting passed through the various levels of
67 * GMMU mapping functions. 141 * GMMU mapping functions.
68 * 142 *
69 * The following fields are set statically and do not change throughout 143 * The following fields are set statically and do not change throughout the
70 * mapping call: 144 * mapping call:
71 * 145 *
72 * pgsz: Index into the page size table. 146 * pgsz: Index into the page size table.
@@ -166,8 +240,13 @@ void nvgpu_gmmu_unmap(struct vm_gk20a *vm,
166 struct nvgpu_mem *mem, 240 struct nvgpu_mem *mem,
167 u64 gpu_va); 241 u64 gpu_va);
168 242
169void nvgpu_free_gmmu_pages(struct vm_gk20a *vm, 243int __nvgpu_pd_alloc(struct vm_gk20a *vm, struct nvgpu_gmmu_pd *pd, u32 bytes);
170 struct nvgpu_gmmu_pd *entry); 244void __nvgpu_pd_free(struct vm_gk20a *vm, struct nvgpu_gmmu_pd *pd);
245int __nvgpu_pd_cache_alloc_direct(struct gk20a *g,
246 struct nvgpu_gmmu_pd *pd, u32 bytes);
247void __nvgpu_pd_cache_free_direct(struct gk20a *g, struct nvgpu_gmmu_pd *pd);
248int nvgpu_pd_cache_init(struct gk20a *g);
249void nvgpu_pd_cache_fini(struct gk20a *g);
171 250
172/* 251/*
173 * Some useful routines that are shared across chips. 252 * Some useful routines that are shared across chips.
@@ -181,7 +260,7 @@ static inline u32 pd_offset_from_index(const struct gk20a_mmu_level *l,
181static inline void pd_write(struct gk20a *g, struct nvgpu_gmmu_pd *pd, 260static inline void pd_write(struct gk20a *g, struct nvgpu_gmmu_pd *pd,
182 size_t w, size_t data) 261 size_t w, size_t data)
183{ 262{
184 nvgpu_mem_wr32(g, &pd->mem, w, data); 263 nvgpu_mem_wr32(g, pd->mem, (pd->mem_offs / sizeof(u32)) + w, data);
185} 264}
186 265
187 266
diff --git a/drivers/gpu/nvgpu/include/nvgpu/log.h b/drivers/gpu/nvgpu/include/nvgpu/log.h
index 3b8e6b19..a1110a59 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/log.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/log.h
@@ -68,6 +68,7 @@ enum nvgpu_log_categories {
68 gpu_dbg_xv = BIT(17), /* XVE debugging. */ 68 gpu_dbg_xv = BIT(17), /* XVE debugging. */
69 gpu_dbg_shutdown = BIT(18), /* GPU shutdown tracing. */ 69 gpu_dbg_shutdown = BIT(18), /* GPU shutdown tracing. */
70 gpu_dbg_kmem = BIT(19), /* Kmem tracking debugging. */ 70 gpu_dbg_kmem = BIT(19), /* Kmem tracking debugging. */
71 gpu_dbg_pd_cache = BIT(20), /* PD cache traces. */
71 gpu_dbg_mem = BIT(31), /* memory accesses; very verbose. */ 72 gpu_dbg_mem = BIT(31), /* memory accesses; very verbose. */
72}; 73};
73 74