diff options
Diffstat (limited to 'include/nvgpu/gmmu.h')
-rw-r--r-- | include/nvgpu/gmmu.h | 369 |
1 files changed, 369 insertions, 0 deletions
diff --git a/include/nvgpu/gmmu.h b/include/nvgpu/gmmu.h new file mode 100644 index 0000000..2fc0d44 --- /dev/null +++ b/include/nvgpu/gmmu.h | |||
@@ -0,0 +1,369 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
5 | * copy of this software and associated documentation files (the "Software"), | ||
6 | * to deal in the Software without restriction, including without limitation | ||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
9 | * Software is furnished to do so, subject to the following conditions: | ||
10 | * | ||
11 | * The above copyright notice and this permission notice shall be included in | ||
12 | * all copies or substantial portions of the Software. | ||
13 | * | ||
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
20 | * DEALINGS IN THE SOFTWARE. | ||
21 | */ | ||
22 | |||
23 | #ifndef NVGPU_GMMU_H | ||
24 | #define NVGPU_GMMU_H | ||
25 | |||
26 | #include <nvgpu/types.h> | ||
27 | #include <nvgpu/nvgpu_mem.h> | ||
28 | #include <nvgpu/list.h> | ||
29 | #include <nvgpu/rbtree.h> | ||
30 | #include <nvgpu/lock.h> | ||
31 | #include <nvgpu/bitops.h> | ||
32 | #include <nvgpu/mm.h> | ||
33 | |||
34 | /* | ||
35 | * This is the GMMU API visible to blocks outside of the GMMU. Basically this | ||
36 | * API supports all the different types of mappings that might be done in the | ||
37 | * GMMU. | ||
38 | */ | ||
39 | |||
40 | struct vm_gk20a; | ||
41 | struct nvgpu_mem; | ||
42 | |||
43 | #define GMMU_PAGE_SIZE_SMALL 0U | ||
44 | #define GMMU_PAGE_SIZE_BIG 1U | ||
45 | #define GMMU_PAGE_SIZE_KERNEL 2U | ||
46 | #define GMMU_NR_PAGE_SIZES 3U | ||
47 | |||
48 | enum gk20a_mem_rw_flag { | ||
49 | gk20a_mem_flag_none = 0, /* RW */ | ||
50 | gk20a_mem_flag_read_only = 1, /* RO */ | ||
51 | gk20a_mem_flag_write_only = 2, /* WO */ | ||
52 | }; | ||
53 | |||
54 | /* | ||
55 | * Minimum size of a cache. The number of different caches in the nvgpu_pd_cache | ||
56 | * structure is of course depending on this. The MIN_SHIFT define is the right | ||
57 | * number of bits to shift to determine which list to use in the array of lists. | ||
58 | * | ||
59 | * For Linux, limit the use of the cache to entries less than the page size, to | ||
60 | * avoid potential problems with running out of CMA memory when allocating large, | ||
61 | * contiguous slabs, as would be required for non-iommmuable chips. | ||
62 | */ | ||
63 | #define NVGPU_PD_CACHE_MIN 256U | ||
64 | #define NVGPU_PD_CACHE_MIN_SHIFT 9U | ||
65 | |||
66 | #ifdef __KERNEL__ | ||
67 | |||
68 | #if PAGE_SIZE == 4096 | ||
69 | #define NVGPU_PD_CACHE_COUNT 4U | ||
70 | #elif PAGE_SIZE == 65536 | ||
71 | #define NVGPU_PD_CACHE_COUNT 8U | ||
72 | #else | ||
73 | #error "Unsupported page size." | ||
74 | #endif | ||
75 | |||
76 | #else | ||
77 | #define NVGPU_PD_CACHE_COUNT 8U | ||
78 | #endif | ||
79 | |||
80 | #define NVGPU_PD_CACHE_SIZE (NVGPU_PD_CACHE_MIN * (1U << NVGPU_PD_CACHE_COUNT)) | ||
81 | |||
82 | struct nvgpu_pd_mem_entry { | ||
83 | struct nvgpu_mem mem; | ||
84 | |||
85 | /* | ||
86 | * Size of the page directories (not the mem). alloc_map is a bitmap | ||
87 | * showing which PDs have been allocated. | ||
88 | * | ||
89 | * The size of mem will be NVGPU_PD_CACHE_SIZE | ||
90 | * and pd_size will always be a power of 2. | ||
91 | * | ||
92 | */ | ||
93 | u32 pd_size; | ||
94 | DECLARE_BITMAP(alloc_map, NVGPU_PD_CACHE_SIZE / NVGPU_PD_CACHE_MIN); | ||
95 | |||
96 | /* Total number of allocations in this PD. */ | ||
97 | u32 allocs; | ||
98 | |||
99 | struct nvgpu_list_node list_entry; | ||
100 | struct nvgpu_rbtree_node tree_entry; | ||
101 | }; | ||
102 | |||
103 | static inline struct nvgpu_pd_mem_entry * | ||
104 | nvgpu_pd_mem_entry_from_list_entry(struct nvgpu_list_node *node) | ||
105 | { | ||
106 | return (struct nvgpu_pd_mem_entry *) | ||
107 | ((uintptr_t)node - | ||
108 | offsetof(struct nvgpu_pd_mem_entry, list_entry)); | ||
109 | }; | ||
110 | |||
111 | static inline struct nvgpu_pd_mem_entry * | ||
112 | nvgpu_pd_mem_entry_from_tree_entry(struct nvgpu_rbtree_node *node) | ||
113 | { | ||
114 | return (struct nvgpu_pd_mem_entry *) | ||
115 | ((uintptr_t)node - | ||
116 | offsetof(struct nvgpu_pd_mem_entry, tree_entry)); | ||
117 | }; | ||
118 | |||
119 | /* | ||
120 | * A cache for allocating PD memory from. This enables smaller PDs to be packed | ||
121 | * into single pages. | ||
122 | * | ||
123 | * This is fairly complex so see the documentation in pd_cache.c for a full | ||
124 | * description of how this is organized. | ||
125 | */ | ||
126 | struct nvgpu_pd_cache { | ||
127 | /* | ||
128 | * Array of lists of full nvgpu_pd_mem_entries and partially full (or | ||
129 | * empty) nvgpu_pd_mem_entries. | ||
130 | */ | ||
131 | struct nvgpu_list_node full[NVGPU_PD_CACHE_COUNT]; | ||
132 | struct nvgpu_list_node partial[NVGPU_PD_CACHE_COUNT]; | ||
133 | |||
134 | /* | ||
135 | * Tree of all allocated struct nvgpu_mem's for fast look up. | ||
136 | */ | ||
137 | struct nvgpu_rbtree_node *mem_tree; | ||
138 | |||
139 | /* | ||
140 | * All access to the cache much be locked. This protects the lists and | ||
141 | * the rb tree. | ||
142 | */ | ||
143 | struct nvgpu_mutex lock; | ||
144 | }; | ||
145 | |||
146 | /* | ||
147 | * GMMU page directory. This is the kernel's tracking of a list of PDEs or PTEs | ||
148 | * in the GMMU. | ||
149 | */ | ||
150 | struct nvgpu_gmmu_pd { | ||
151 | /* | ||
152 | * DMA memory describing the PTEs or PDEs. @mem_offs describes the | ||
153 | * offset of the PDE table in @mem. @cached specifies if this PD is | ||
154 | * using pd_cache memory. | ||
155 | */ | ||
156 | struct nvgpu_mem *mem; | ||
157 | u32 mem_offs; | ||
158 | bool cached; | ||
159 | |||
160 | /* | ||
161 | * List of pointers to the next level of page tables. Does not | ||
162 | * need to be populated when this PD is pointing to PTEs. | ||
163 | */ | ||
164 | struct nvgpu_gmmu_pd *entries; | ||
165 | int num_entries; | ||
166 | }; | ||
167 | |||
168 | /* | ||
169 | * Reduce the number of arguments getting passed through the various levels of | ||
170 | * GMMU mapping functions. | ||
171 | * | ||
172 | * The following fields are set statically and do not change throughout the | ||
173 | * mapping call: | ||
174 | * | ||
175 | * pgsz: Index into the page size table. | ||
176 | * kind_v: Kind attributes for mapping. | ||
177 | * cacheable: Cacheability of the mapping. | ||
178 | * rw_flag: Flag from enum gk20a_mem_rw_flag | ||
179 | * sparse: Set if the mapping should be sparse. | ||
180 | * priv: Privilidged mapping. | ||
181 | * coherent: Set if the mapping should be IO coherent. | ||
182 | * valid: Set if the PTE should be marked valid. | ||
183 | * aperture: VIDMEM or SYSMEM. | ||
184 | * debug: When set print debugging info. | ||
185 | * platform_atomic: True if platform_atomic flag is valid. | ||
186 | * | ||
187 | * These fields are dynamically updated as necessary during the map: | ||
188 | * | ||
189 | * ctag: Comptag line in the comptag cache; | ||
190 | * updated every time we write a PTE. | ||
191 | */ | ||
192 | struct nvgpu_gmmu_attrs { | ||
193 | u32 pgsz; | ||
194 | u32 kind_v; | ||
195 | u64 ctag; | ||
196 | bool cacheable; | ||
197 | enum gk20a_mem_rw_flag rw_flag; | ||
198 | bool sparse; | ||
199 | bool priv; | ||
200 | bool valid; | ||
201 | enum nvgpu_aperture aperture; | ||
202 | bool debug; | ||
203 | bool l3_alloc; | ||
204 | bool platform_atomic; | ||
205 | }; | ||
206 | |||
207 | struct gk20a_mmu_level { | ||
208 | int hi_bit[2]; | ||
209 | int lo_bit[2]; | ||
210 | |||
211 | /* | ||
212 | * Build map from virt_addr -> phys_addr. | ||
213 | */ | ||
214 | void (*update_entry)(struct vm_gk20a *vm, | ||
215 | const struct gk20a_mmu_level *l, | ||
216 | struct nvgpu_gmmu_pd *pd, | ||
217 | u32 pd_idx, | ||
218 | u64 phys_addr, | ||
219 | u64 virt_addr, | ||
220 | struct nvgpu_gmmu_attrs *attrs); | ||
221 | u32 entry_size; | ||
222 | /* | ||
223 | * Get pde page size | ||
224 | */ | ||
225 | u32 (*get_pgsz)(struct gk20a *g, const struct gk20a_mmu_level *l, | ||
226 | struct nvgpu_gmmu_pd *pd, u32 pd_idx); | ||
227 | }; | ||
228 | |||
229 | static inline const char *nvgpu_gmmu_perm_str(enum gk20a_mem_rw_flag p) | ||
230 | { | ||
231 | switch (p) { | ||
232 | case gk20a_mem_flag_none: | ||
233 | return "RW"; | ||
234 | case gk20a_mem_flag_write_only: | ||
235 | return "WO"; | ||
236 | case gk20a_mem_flag_read_only: | ||
237 | return "RO"; | ||
238 | default: | ||
239 | return "??"; | ||
240 | } | ||
241 | } | ||
242 | |||
243 | int nvgpu_gmmu_init_page_table(struct vm_gk20a *vm); | ||
244 | |||
245 | /** | ||
246 | * nvgpu_gmmu_map - Map memory into the GMMU. | ||
247 | * | ||
248 | * Kernel space. | ||
249 | */ | ||
250 | u64 nvgpu_gmmu_map(struct vm_gk20a *vm, | ||
251 | struct nvgpu_mem *mem, | ||
252 | u64 size, | ||
253 | u32 flags, | ||
254 | enum gk20a_mem_rw_flag rw_flag, | ||
255 | bool priv, | ||
256 | enum nvgpu_aperture aperture); | ||
257 | |||
258 | /** | ||
259 | * nvgpu_gmmu_map_fixed - Map memory into the GMMU. | ||
260 | * | ||
261 | * Kernel space. | ||
262 | */ | ||
263 | u64 nvgpu_gmmu_map_fixed(struct vm_gk20a *vm, | ||
264 | struct nvgpu_mem *mem, | ||
265 | u64 addr, | ||
266 | u64 size, | ||
267 | u32 flags, | ||
268 | enum gk20a_mem_rw_flag rw_flag, | ||
269 | bool priv, | ||
270 | enum nvgpu_aperture aperture); | ||
271 | |||
272 | /** | ||
273 | * nvgpu_gmmu_unmap - Unmap a buffer. | ||
274 | * | ||
275 | * Kernel space. | ||
276 | */ | ||
277 | void nvgpu_gmmu_unmap(struct vm_gk20a *vm, | ||
278 | struct nvgpu_mem *mem, | ||
279 | u64 gpu_va); | ||
280 | |||
281 | int nvgpu_pd_alloc(struct vm_gk20a *vm, | ||
282 | struct nvgpu_gmmu_pd *pd, | ||
283 | u32 bytes); | ||
284 | |||
285 | void nvgpu_pd_free(struct vm_gk20a *vm, struct nvgpu_gmmu_pd *pd); | ||
286 | int nvgpu_pd_cache_alloc_direct(struct gk20a *g, | ||
287 | struct nvgpu_gmmu_pd *pd, u32 bytes); | ||
288 | void nvgpu_pd_cache_free_direct(struct gk20a *g, struct nvgpu_gmmu_pd *pd); | ||
289 | int nvgpu_pd_cache_init(struct gk20a *g); | ||
290 | void nvgpu_pd_cache_fini(struct gk20a *g); | ||
291 | |||
292 | /* | ||
293 | * Some useful routines that are shared across chips. | ||
294 | */ | ||
295 | static inline u32 pd_offset_from_index(const struct gk20a_mmu_level *l, | ||
296 | u32 pd_idx) | ||
297 | { | ||
298 | return (pd_idx * l->entry_size) / sizeof(u32); | ||
299 | } | ||
300 | |||
301 | static inline void pd_write(struct gk20a *g, struct nvgpu_gmmu_pd *pd, | ||
302 | size_t w, size_t data) | ||
303 | { | ||
304 | nvgpu_mem_wr32(g, pd->mem, (pd->mem_offs / sizeof(u32)) + w, data); | ||
305 | } | ||
306 | |||
307 | /** | ||
308 | * __nvgpu_pte_words - Compute number of words in a PTE. | ||
309 | * | ||
310 | * @g - The GPU. | ||
311 | * | ||
312 | * This computes and returns the size of a PTE for the passed chip. | ||
313 | */ | ||
314 | u32 __nvgpu_pte_words(struct gk20a *g); | ||
315 | |||
316 | /** | ||
317 | * __nvgpu_get_pte - Get the contents of a PTE by virtual address | ||
318 | * | ||
319 | * @g - The GPU. | ||
320 | * @vm - VM to look in. | ||
321 | * @vaddr - GPU virtual address. | ||
322 | * @pte - [out] Set to the contents of the PTE. | ||
323 | * | ||
324 | * Find a PTE in the passed VM based on the passed GPU virtual address. This | ||
325 | * will @pte with a copy of the contents of the PTE. @pte must be an array of | ||
326 | * u32s large enough to contain the PTE. This can be computed using | ||
327 | * __nvgpu_pte_words(). | ||
328 | * | ||
329 | * If you wish to write to this PTE then you may modify @pte and then use the | ||
330 | * __nvgpu_set_pte(). | ||
331 | * | ||
332 | * This function returns 0 if the PTE is found and -EINVAL otherwise. | ||
333 | */ | ||
334 | int __nvgpu_get_pte(struct gk20a *g, struct vm_gk20a *vm, u64 vaddr, u32 *pte); | ||
335 | |||
336 | /** | ||
337 | * __nvgpu_set_pte - Set a PTE based on virtual address | ||
338 | * | ||
339 | * @g - The GPU. | ||
340 | * @vm - VM to look in. | ||
341 | * @vaddr - GPU virtual address. | ||
342 | * @pte - The contents of the PTE to write. | ||
343 | * | ||
344 | * Find a PTE and overwrite the contents of that PTE with the passed in data | ||
345 | * located in @pte. If the PTE does not exist then no writing will happen. That | ||
346 | * is this function will not fill out the page tables for you. The expectation | ||
347 | * is that the passed @vaddr has already been mapped and this is just modifying | ||
348 | * the mapping (for instance changing invalid to valid). | ||
349 | * | ||
350 | * @pte must contain at least the required words for the PTE. See | ||
351 | * __nvgpu_pte_words(). | ||
352 | * | ||
353 | * This function returns 0 on success and -EINVAL otherwise. | ||
354 | */ | ||
355 | int __nvgpu_set_pte(struct gk20a *g, struct vm_gk20a *vm, u64 vaddr, u32 *pte); | ||
356 | |||
357 | |||
358 | /* | ||
359 | * Internal debugging routines. Probably not something you want to use. | ||
360 | */ | ||
361 | #define pte_dbg(g, attrs, fmt, args...) \ | ||
362 | do { \ | ||
363 | if ((attrs != NULL) && (attrs->debug)) \ | ||
364 | nvgpu_info(g, fmt, ##args); \ | ||
365 | else \ | ||
366 | nvgpu_log(g, gpu_dbg_pte, fmt, ##args); \ | ||
367 | } while (0) | ||
368 | |||
369 | #endif /* NVGPU_GMMU_H */ | ||