aboutsummaryrefslogtreecommitdiffstats
path: root/include/nvgpu/gmmu.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/nvgpu/gmmu.h')
-rw-r--r--include/nvgpu/gmmu.h369
1 files changed, 369 insertions, 0 deletions
diff --git a/include/nvgpu/gmmu.h b/include/nvgpu/gmmu.h
new file mode 100644
index 0000000..2fc0d44
--- /dev/null
+++ b/include/nvgpu/gmmu.h
@@ -0,0 +1,369 @@
1/*
2 * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23#ifndef NVGPU_GMMU_H
24#define NVGPU_GMMU_H
25
26#include <nvgpu/types.h>
27#include <nvgpu/nvgpu_mem.h>
28#include <nvgpu/list.h>
29#include <nvgpu/rbtree.h>
30#include <nvgpu/lock.h>
31#include <nvgpu/bitops.h>
32#include <nvgpu/mm.h>
33
34/*
35 * This is the GMMU API visible to blocks outside of the GMMU. Basically this
36 * API supports all the different types of mappings that might be done in the
37 * GMMU.
38 */
39
40struct vm_gk20a;
41struct nvgpu_mem;
42
43#define GMMU_PAGE_SIZE_SMALL 0U
44#define GMMU_PAGE_SIZE_BIG 1U
45#define GMMU_PAGE_SIZE_KERNEL 2U
46#define GMMU_NR_PAGE_SIZES 3U
47
48enum gk20a_mem_rw_flag {
49 gk20a_mem_flag_none = 0, /* RW */
50 gk20a_mem_flag_read_only = 1, /* RO */
51 gk20a_mem_flag_write_only = 2, /* WO */
52};
53
54/*
55 * Minimum size of a cache. The number of different caches in the nvgpu_pd_cache
56 * structure is of course depending on this. The MIN_SHIFT define is the right
57 * number of bits to shift to determine which list to use in the array of lists.
58 *
59 * For Linux, limit the use of the cache to entries less than the page size, to
60 * avoid potential problems with running out of CMA memory when allocating large,
61 * contiguous slabs, as would be required for non-iommmuable chips.
62 */
63#define NVGPU_PD_CACHE_MIN 256U
64#define NVGPU_PD_CACHE_MIN_SHIFT 9U
65
66#ifdef __KERNEL__
67
68#if PAGE_SIZE == 4096
69#define NVGPU_PD_CACHE_COUNT 4U
70#elif PAGE_SIZE == 65536
71#define NVGPU_PD_CACHE_COUNT 8U
72#else
73#error "Unsupported page size."
74#endif
75
76#else
77#define NVGPU_PD_CACHE_COUNT 8U
78#endif
79
80#define NVGPU_PD_CACHE_SIZE (NVGPU_PD_CACHE_MIN * (1U << NVGPU_PD_CACHE_COUNT))
81
82struct nvgpu_pd_mem_entry {
83 struct nvgpu_mem mem;
84
85 /*
86 * Size of the page directories (not the mem). alloc_map is a bitmap
87 * showing which PDs have been allocated.
88 *
89 * The size of mem will be NVGPU_PD_CACHE_SIZE
90 * and pd_size will always be a power of 2.
91 *
92 */
93 u32 pd_size;
94 DECLARE_BITMAP(alloc_map, NVGPU_PD_CACHE_SIZE / NVGPU_PD_CACHE_MIN);
95
96 /* Total number of allocations in this PD. */
97 u32 allocs;
98
99 struct nvgpu_list_node list_entry;
100 struct nvgpu_rbtree_node tree_entry;
101};
102
103static inline struct nvgpu_pd_mem_entry *
104nvgpu_pd_mem_entry_from_list_entry(struct nvgpu_list_node *node)
105{
106 return (struct nvgpu_pd_mem_entry *)
107 ((uintptr_t)node -
108 offsetof(struct nvgpu_pd_mem_entry, list_entry));
109};
110
111static inline struct nvgpu_pd_mem_entry *
112nvgpu_pd_mem_entry_from_tree_entry(struct nvgpu_rbtree_node *node)
113{
114 return (struct nvgpu_pd_mem_entry *)
115 ((uintptr_t)node -
116 offsetof(struct nvgpu_pd_mem_entry, tree_entry));
117};
118
119/*
120 * A cache for allocating PD memory from. This enables smaller PDs to be packed
121 * into single pages.
122 *
123 * This is fairly complex so see the documentation in pd_cache.c for a full
124 * description of how this is organized.
125 */
126struct nvgpu_pd_cache {
127 /*
128 * Array of lists of full nvgpu_pd_mem_entries and partially full (or
129 * empty) nvgpu_pd_mem_entries.
130 */
131 struct nvgpu_list_node full[NVGPU_PD_CACHE_COUNT];
132 struct nvgpu_list_node partial[NVGPU_PD_CACHE_COUNT];
133
134 /*
135 * Tree of all allocated struct nvgpu_mem's for fast look up.
136 */
137 struct nvgpu_rbtree_node *mem_tree;
138
139 /*
140 * All access to the cache much be locked. This protects the lists and
141 * the rb tree.
142 */
143 struct nvgpu_mutex lock;
144};
145
146/*
147 * GMMU page directory. This is the kernel's tracking of a list of PDEs or PTEs
148 * in the GMMU.
149 */
150struct nvgpu_gmmu_pd {
151 /*
152 * DMA memory describing the PTEs or PDEs. @mem_offs describes the
153 * offset of the PDE table in @mem. @cached specifies if this PD is
154 * using pd_cache memory.
155 */
156 struct nvgpu_mem *mem;
157 u32 mem_offs;
158 bool cached;
159
160 /*
161 * List of pointers to the next level of page tables. Does not
162 * need to be populated when this PD is pointing to PTEs.
163 */
164 struct nvgpu_gmmu_pd *entries;
165 int num_entries;
166};
167
168/*
169 * Reduce the number of arguments getting passed through the various levels of
170 * GMMU mapping functions.
171 *
172 * The following fields are set statically and do not change throughout the
173 * mapping call:
174 *
175 * pgsz: Index into the page size table.
176 * kind_v: Kind attributes for mapping.
177 * cacheable: Cacheability of the mapping.
178 * rw_flag: Flag from enum gk20a_mem_rw_flag
179 * sparse: Set if the mapping should be sparse.
180 * priv: Privilidged mapping.
181 * coherent: Set if the mapping should be IO coherent.
182 * valid: Set if the PTE should be marked valid.
183 * aperture: VIDMEM or SYSMEM.
184 * debug: When set print debugging info.
185 * platform_atomic: True if platform_atomic flag is valid.
186 *
187 * These fields are dynamically updated as necessary during the map:
188 *
189 * ctag: Comptag line in the comptag cache;
190 * updated every time we write a PTE.
191 */
192struct nvgpu_gmmu_attrs {
193 u32 pgsz;
194 u32 kind_v;
195 u64 ctag;
196 bool cacheable;
197 enum gk20a_mem_rw_flag rw_flag;
198 bool sparse;
199 bool priv;
200 bool valid;
201 enum nvgpu_aperture aperture;
202 bool debug;
203 bool l3_alloc;
204 bool platform_atomic;
205};
206
207struct gk20a_mmu_level {
208 int hi_bit[2];
209 int lo_bit[2];
210
211 /*
212 * Build map from virt_addr -> phys_addr.
213 */
214 void (*update_entry)(struct vm_gk20a *vm,
215 const struct gk20a_mmu_level *l,
216 struct nvgpu_gmmu_pd *pd,
217 u32 pd_idx,
218 u64 phys_addr,
219 u64 virt_addr,
220 struct nvgpu_gmmu_attrs *attrs);
221 u32 entry_size;
222 /*
223 * Get pde page size
224 */
225 u32 (*get_pgsz)(struct gk20a *g, const struct gk20a_mmu_level *l,
226 struct nvgpu_gmmu_pd *pd, u32 pd_idx);
227};
228
229static inline const char *nvgpu_gmmu_perm_str(enum gk20a_mem_rw_flag p)
230{
231 switch (p) {
232 case gk20a_mem_flag_none:
233 return "RW";
234 case gk20a_mem_flag_write_only:
235 return "WO";
236 case gk20a_mem_flag_read_only:
237 return "RO";
238 default:
239 return "??";
240 }
241}
242
243int nvgpu_gmmu_init_page_table(struct vm_gk20a *vm);
244
245/**
246 * nvgpu_gmmu_map - Map memory into the GMMU.
247 *
248 * Kernel space.
249 */
250u64 nvgpu_gmmu_map(struct vm_gk20a *vm,
251 struct nvgpu_mem *mem,
252 u64 size,
253 u32 flags,
254 enum gk20a_mem_rw_flag rw_flag,
255 bool priv,
256 enum nvgpu_aperture aperture);
257
258/**
259 * nvgpu_gmmu_map_fixed - Map memory into the GMMU.
260 *
261 * Kernel space.
262 */
263u64 nvgpu_gmmu_map_fixed(struct vm_gk20a *vm,
264 struct nvgpu_mem *mem,
265 u64 addr,
266 u64 size,
267 u32 flags,
268 enum gk20a_mem_rw_flag rw_flag,
269 bool priv,
270 enum nvgpu_aperture aperture);
271
272/**
273 * nvgpu_gmmu_unmap - Unmap a buffer.
274 *
275 * Kernel space.
276 */
277void nvgpu_gmmu_unmap(struct vm_gk20a *vm,
278 struct nvgpu_mem *mem,
279 u64 gpu_va);
280
281int nvgpu_pd_alloc(struct vm_gk20a *vm,
282 struct nvgpu_gmmu_pd *pd,
283 u32 bytes);
284
285void nvgpu_pd_free(struct vm_gk20a *vm, struct nvgpu_gmmu_pd *pd);
286int nvgpu_pd_cache_alloc_direct(struct gk20a *g,
287 struct nvgpu_gmmu_pd *pd, u32 bytes);
288void nvgpu_pd_cache_free_direct(struct gk20a *g, struct nvgpu_gmmu_pd *pd);
289int nvgpu_pd_cache_init(struct gk20a *g);
290void nvgpu_pd_cache_fini(struct gk20a *g);
291
292/*
293 * Some useful routines that are shared across chips.
294 */
295static inline u32 pd_offset_from_index(const struct gk20a_mmu_level *l,
296 u32 pd_idx)
297{
298 return (pd_idx * l->entry_size) / sizeof(u32);
299}
300
301static inline void pd_write(struct gk20a *g, struct nvgpu_gmmu_pd *pd,
302 size_t w, size_t data)
303{
304 nvgpu_mem_wr32(g, pd->mem, (pd->mem_offs / sizeof(u32)) + w, data);
305}
306
307/**
308 * __nvgpu_pte_words - Compute number of words in a PTE.
309 *
310 * @g - The GPU.
311 *
312 * This computes and returns the size of a PTE for the passed chip.
313 */
314u32 __nvgpu_pte_words(struct gk20a *g);
315
316/**
317 * __nvgpu_get_pte - Get the contents of a PTE by virtual address
318 *
319 * @g - The GPU.
320 * @vm - VM to look in.
321 * @vaddr - GPU virtual address.
322 * @pte - [out] Set to the contents of the PTE.
323 *
324 * Find a PTE in the passed VM based on the passed GPU virtual address. This
325 * will @pte with a copy of the contents of the PTE. @pte must be an array of
326 * u32s large enough to contain the PTE. This can be computed using
327 * __nvgpu_pte_words().
328 *
329 * If you wish to write to this PTE then you may modify @pte and then use the
330 * __nvgpu_set_pte().
331 *
332 * This function returns 0 if the PTE is found and -EINVAL otherwise.
333 */
334int __nvgpu_get_pte(struct gk20a *g, struct vm_gk20a *vm, u64 vaddr, u32 *pte);
335
336/**
337 * __nvgpu_set_pte - Set a PTE based on virtual address
338 *
339 * @g - The GPU.
340 * @vm - VM to look in.
341 * @vaddr - GPU virtual address.
342 * @pte - The contents of the PTE to write.
343 *
344 * Find a PTE and overwrite the contents of that PTE with the passed in data
345 * located in @pte. If the PTE does not exist then no writing will happen. That
346 * is this function will not fill out the page tables for you. The expectation
347 * is that the passed @vaddr has already been mapped and this is just modifying
348 * the mapping (for instance changing invalid to valid).
349 *
350 * @pte must contain at least the required words for the PTE. See
351 * __nvgpu_pte_words().
352 *
353 * This function returns 0 on success and -EINVAL otherwise.
354 */
355int __nvgpu_set_pte(struct gk20a *g, struct vm_gk20a *vm, u64 vaddr, u32 *pte);
356
357
358/*
359 * Internal debugging routines. Probably not something you want to use.
360 */
361#define pte_dbg(g, attrs, fmt, args...) \
362 do { \
363 if ((attrs != NULL) && (attrs->debug)) \
364 nvgpu_info(g, fmt, ##args); \
365 else \
366 nvgpu_log(g, gpu_dbg_pte, fmt, ##args); \
367 } while (0)
368
369#endif /* NVGPU_GMMU_H */