diff options
Diffstat (limited to 'drivers/gpu/nvgpu/common/mm/pd_cache.c')
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/pd_cache.c | 444 |
1 files changed, 444 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/mm/pd_cache.c b/drivers/gpu/nvgpu/common/mm/pd_cache.c new file mode 100644 index 00000000..4c3e06ba --- /dev/null +++ b/drivers/gpu/nvgpu/common/mm/pd_cache.c | |||
@@ -0,0 +1,444 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
5 | * copy of this software and associated documentation files (the "Software"), | ||
6 | * to deal in the Software without restriction, including without limitation | ||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
9 | * Software is furnished to do so, subject to the following conditions: | ||
10 | * | ||
11 | * The above copyright notice and this permission notice shall be included in | ||
12 | * all copies or substantial portions of the Software. | ||
13 | * | ||
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
20 | * DEALINGS IN THE SOFTWARE. | ||
21 | */ | ||
22 | |||
23 | #include <nvgpu/log.h> | ||
24 | #include <nvgpu/dma.h> | ||
25 | #include <nvgpu/gmmu.h> | ||
26 | #include <nvgpu/nvgpu_mem.h> | ||
27 | #include <nvgpu/list.h> | ||
28 | #include <nvgpu/log2.h> | ||
29 | |||
30 | #include "gk20a/gk20a.h" | ||
31 | #include "gk20a/mm_gk20a.h" | ||
32 | |||
33 | #define pd_dbg(g, fmt, args...) nvgpu_log(g, gpu_dbg_pd_cache, fmt, ##args) | ||
34 | |||
35 | /** | ||
36 | * DOC: PD cache | ||
37 | * | ||
38 | * In the name of saving memory with the many sub-page sized PD levels in Pascal | ||
39 | * and beyond a way of packing PD tables together is necessary. This code here | ||
40 | * does just that. If a PD table only requires 1024 bytes, then it is possible | ||
41 | * to have 4 of these PDs in one page. This is even more pronounced for 256 byte | ||
42 | * PD tables. | ||
43 | * | ||
44 | * The pd cache is basially just a slab allocator. Each instance of the nvgpu | ||
45 | * driver makes one of these structs: | ||
46 | * | ||
47 | * struct nvgpu_pd_cache { | ||
48 | * struct nvgpu_list_node full[NVGPU_PD_CACHE_COUNT]; | ||
49 | * struct nvgpu_list_node partial[NVGPU_PD_CACHE_COUNT]; | ||
50 | * | ||
51 | * struct nvgpu_rbtree_node *mem_tree; | ||
52 | * }; | ||
53 | * | ||
54 | * There are two sets of lists, the full and the partial. The full lists contain | ||
55 | * pages of memory for which all the memory in that page is in use. The partial | ||
56 | * lists contain partially full pages of memory which can be used for more PD | ||
57 | * allocations. There a couple of assumptions here: | ||
58 | * | ||
59 | * 1. PDs greater than or equal to the page size bypass the pd cache. | ||
60 | * 2. PDs are always power of 2 and greater than %NVGPU_PD_CACHE_MIN bytes. | ||
61 | * | ||
62 | * There are NVGPU_PD_CACHE_COUNT full lists and the same number of partial | ||
63 | * lists. For a 4Kb page NVGPU_PD_CACHE_COUNT is 4. This is enough space for | ||
64 | * 256, 512, 1024, and 2048 byte PDs. | ||
65 | * | ||
66 | * __nvgpu_pd_alloc() will allocate a PD for the GMMU. It will check if the PD | ||
67 | * size is page size or larger and choose the correct allocation scheme - either | ||
68 | * from the PD cache or directly. Similarly __nvgpu_pd_free() will free a PD | ||
69 | * allocated by __nvgpu_pd_alloc(). | ||
70 | * | ||
71 | * Since the top level PD (the PDB) is a page aligned pointer but less than a | ||
72 | * page size the direct functions must be used for allocating PDBs. Otherwise | ||
73 | * there would be alignment issues for the PDBs when they get packed. | ||
74 | */ | ||
75 | |||
76 | static u32 nvgpu_pd_cache_nr(u32 bytes) | ||
77 | { | ||
78 | return ilog2(bytes >> (NVGPU_PD_CACHE_MIN_SHIFT - 1)); | ||
79 | } | ||
80 | |||
81 | static u32 nvgpu_pd_cache_get_mask(struct nvgpu_pd_mem_entry *pentry) | ||
82 | { | ||
83 | u32 mask_offset = 1 << (PAGE_SIZE / pentry->pd_size); | ||
84 | |||
85 | return mask_offset - 1; | ||
86 | } | ||
87 | |||
88 | int nvgpu_pd_cache_init(struct gk20a *g) | ||
89 | { | ||
90 | struct nvgpu_pd_cache *cache; | ||
91 | int i; | ||
92 | |||
93 | /* | ||
94 | * This gets called from finalize_poweron() so we need to make sure we | ||
95 | * don't reinit the pd_cache over and over. | ||
96 | */ | ||
97 | if (g->mm.pd_cache) | ||
98 | return 0; | ||
99 | |||
100 | cache = nvgpu_kzalloc(g, sizeof(*cache)); | ||
101 | if (!cache) { | ||
102 | nvgpu_err(g, "Failed to alloc pd_cache!"); | ||
103 | return -ENOMEM; | ||
104 | } | ||
105 | |||
106 | for (i = 0; i < NVGPU_PD_CACHE_COUNT; i++) { | ||
107 | nvgpu_init_list_node(&cache->full[i]); | ||
108 | nvgpu_init_list_node(&cache->partial[i]); | ||
109 | } | ||
110 | |||
111 | cache->mem_tree = NULL; | ||
112 | g->mm.pd_cache = cache; | ||
113 | nvgpu_mutex_init(&cache->lock); | ||
114 | |||
115 | pd_dbg(g, "PD cache initialized!"); | ||
116 | |||
117 | return 0; | ||
118 | } | ||
119 | |||
120 | void nvgpu_pd_cache_fini(struct gk20a *g) | ||
121 | { | ||
122 | int i; | ||
123 | struct nvgpu_pd_cache *cache = g->mm.pd_cache; | ||
124 | |||
125 | if (!cache) | ||
126 | return; | ||
127 | |||
128 | for (i = 0; i < NVGPU_PD_CACHE_COUNT; i++) { | ||
129 | WARN_ON(!nvgpu_list_empty(&cache->full[i])); | ||
130 | WARN_ON(!nvgpu_list_empty(&cache->partial[i])); | ||
131 | } | ||
132 | |||
133 | nvgpu_kfree(g, g->mm.pd_cache); | ||
134 | } | ||
135 | |||
136 | /* | ||
137 | * This is the simple pass-through for greater than page or page sized PDs. | ||
138 | * | ||
139 | * Note: this does not need the cache lock since it does not modify any of the | ||
140 | * PD cache data structures. | ||
141 | */ | ||
142 | int __nvgpu_pd_cache_alloc_direct(struct gk20a *g, | ||
143 | struct nvgpu_gmmu_pd *pd, u32 bytes) | ||
144 | { | ||
145 | int err; | ||
146 | unsigned long flags = 0; | ||
147 | |||
148 | pd_dbg(g, "PD-Alloc [D] %u bytes", bytes); | ||
149 | |||
150 | pd->mem = nvgpu_kzalloc(g, sizeof(*pd->mem)); | ||
151 | if (!pd->mem) { | ||
152 | nvgpu_err(g, "OOM allocating nvgpu_mem struct!"); | ||
153 | return -ENOMEM; | ||
154 | } | ||
155 | |||
156 | /* | ||
157 | * If bytes == PAGE_SIZE then it's impossible to get a discontiguous DMA | ||
158 | * allocation. Some DMA implementations may, despite this fact, still | ||
159 | * use the contiguous pool for page sized allocations. As such only | ||
160 | * request explicitly contiguous allocs if the page directory is larger | ||
161 | * than the page size. Also, of course, this is all only revelant for | ||
162 | * GPUs not using an IOMMU. If there is an IOMMU DMA allocs are always | ||
163 | * going to be virtually contiguous and we don't have to force the | ||
164 | * underlying allocations to be physically contiguous as well. | ||
165 | */ | ||
166 | if (!nvgpu_iommuable(g) && bytes > PAGE_SIZE) | ||
167 | flags = NVGPU_DMA_FORCE_CONTIGUOUS; | ||
168 | |||
169 | err = nvgpu_dma_alloc_flags(g, flags, bytes, pd->mem); | ||
170 | if (err) { | ||
171 | nvgpu_err(g, "OOM allocating page directory!"); | ||
172 | nvgpu_kfree(g, pd->mem); | ||
173 | return -ENOMEM; | ||
174 | } | ||
175 | |||
176 | pd->cached = false; | ||
177 | pd->mem_offs = 0; | ||
178 | |||
179 | return 0; | ||
180 | } | ||
181 | |||
182 | /* | ||
183 | * Make a new nvgpu_pd_cache_entry and allocate a PD from it. Update the passed | ||
184 | * pd to reflect this allocation. | ||
185 | */ | ||
186 | static int nvgpu_pd_cache_alloc_new(struct gk20a *g, | ||
187 | struct nvgpu_pd_cache *cache, | ||
188 | struct nvgpu_gmmu_pd *pd, | ||
189 | u32 bytes) | ||
190 | { | ||
191 | struct nvgpu_pd_mem_entry *pentry; | ||
192 | |||
193 | pd_dbg(g, "PD-Alloc [C] New: offs=0"); | ||
194 | |||
195 | pentry = nvgpu_kzalloc(g, sizeof(*pentry)); | ||
196 | if (!pentry) { | ||
197 | nvgpu_err(g, "OOM allocating pentry!"); | ||
198 | return -ENOMEM; | ||
199 | } | ||
200 | |||
201 | if (nvgpu_dma_alloc(g, PAGE_SIZE, &pentry->mem)) { | ||
202 | nvgpu_kfree(g, pentry); | ||
203 | nvgpu_err(g, "Unable to DMA alloc!"); | ||
204 | return -ENOMEM; | ||
205 | } | ||
206 | |||
207 | pentry->pd_size = bytes; | ||
208 | nvgpu_list_add(&pentry->list_entry, | ||
209 | &cache->partial[nvgpu_pd_cache_nr(bytes)]); | ||
210 | |||
211 | /* | ||
212 | * This allocates the very first PD table in the set of tables in this | ||
213 | * nvgpu_pd_mem_entry. | ||
214 | */ | ||
215 | pentry->alloc_map = 1; | ||
216 | |||
217 | /* | ||
218 | * Now update the nvgpu_gmmu_pd to reflect this allocation. | ||
219 | */ | ||
220 | pd->mem = &pentry->mem; | ||
221 | pd->mem_offs = 0; | ||
222 | pd->cached = true; | ||
223 | |||
224 | pentry->tree_entry.key_start = (u64)(uintptr_t)&pentry->mem; | ||
225 | nvgpu_rbtree_insert(&pentry->tree_entry, &cache->mem_tree); | ||
226 | |||
227 | return 0; | ||
228 | } | ||
229 | |||
230 | static int nvgpu_pd_cache_alloc_from_partial(struct gk20a *g, | ||
231 | struct nvgpu_pd_cache *cache, | ||
232 | struct nvgpu_pd_mem_entry *pentry, | ||
233 | struct nvgpu_gmmu_pd *pd) | ||
234 | { | ||
235 | unsigned long bit_offs; | ||
236 | u32 mem_offs; | ||
237 | u32 pentry_mask = nvgpu_pd_cache_get_mask(pentry); | ||
238 | |||
239 | /* | ||
240 | * Find and allocate an open PD. | ||
241 | */ | ||
242 | bit_offs = ffz(pentry->alloc_map); | ||
243 | mem_offs = bit_offs * pentry->pd_size; | ||
244 | |||
245 | /* Bit map full. Somethings wrong. */ | ||
246 | if (WARN_ON(bit_offs >= ffz(pentry_mask))) | ||
247 | return -ENOMEM; | ||
248 | |||
249 | pentry->alloc_map |= 1 << bit_offs; | ||
250 | |||
251 | pd_dbg(g, "PD-Alloc [C] Partial: offs=%lu", bit_offs); | ||
252 | |||
253 | /* | ||
254 | * First update the pd. | ||
255 | */ | ||
256 | pd->mem = &pentry->mem; | ||
257 | pd->mem_offs = mem_offs; | ||
258 | pd->cached = true; | ||
259 | |||
260 | /* | ||
261 | * Now make sure the pentry is in the correct list (full vs partial). | ||
262 | */ | ||
263 | if ((pentry->alloc_map & pentry_mask) == pentry_mask) { | ||
264 | pd_dbg(g, "Adding pentry to full list!"); | ||
265 | nvgpu_list_del(&pentry->list_entry); | ||
266 | nvgpu_list_add(&pentry->list_entry, | ||
267 | &cache->full[nvgpu_pd_cache_nr(pentry->pd_size)]); | ||
268 | } | ||
269 | |||
270 | return 0; | ||
271 | } | ||
272 | |||
273 | /* | ||
274 | * Get a partially full nvgpu_pd_mem_entry. Returns NULL if there is no partial | ||
275 | * nvgpu_pd_mem_entry's. | ||
276 | */ | ||
277 | static struct nvgpu_pd_mem_entry *nvgpu_pd_cache_get_partial( | ||
278 | struct nvgpu_pd_cache *cache, u32 bytes) | ||
279 | { | ||
280 | struct nvgpu_list_node *list = | ||
281 | &cache->partial[nvgpu_pd_cache_nr(bytes)]; | ||
282 | |||
283 | if (nvgpu_list_empty(list)) | ||
284 | return NULL; | ||
285 | |||
286 | return nvgpu_list_first_entry(list, | ||
287 | nvgpu_pd_mem_entry, | ||
288 | list_entry); | ||
289 | } | ||
290 | |||
291 | /* | ||
292 | * Allocate memory from an nvgpu_mem for the page directory. | ||
293 | */ | ||
294 | static int nvgpu_pd_cache_alloc(struct gk20a *g, struct nvgpu_pd_cache *cache, | ||
295 | struct nvgpu_gmmu_pd *pd, u32 bytes) | ||
296 | { | ||
297 | struct nvgpu_pd_mem_entry *pentry; | ||
298 | int err; | ||
299 | |||
300 | pd_dbg(g, "PD-Alloc [C] %u bytes", bytes); | ||
301 | |||
302 | if (bytes & (bytes - 1) || | ||
303 | (bytes >= PAGE_SIZE || | ||
304 | bytes < NVGPU_PD_CACHE_MIN)) { | ||
305 | pd_dbg(g, "PD-Alloc [C] Invalid (bytes=%u)!", bytes); | ||
306 | return -EINVAL; | ||
307 | } | ||
308 | |||
309 | pentry = nvgpu_pd_cache_get_partial(cache, bytes); | ||
310 | if (!pentry) | ||
311 | err = nvgpu_pd_cache_alloc_new(g, cache, pd, bytes); | ||
312 | else | ||
313 | err = nvgpu_pd_cache_alloc_from_partial(g, cache, pentry, pd); | ||
314 | |||
315 | if (err) | ||
316 | nvgpu_err(g, "PD-Alloc [C] Failed!"); | ||
317 | |||
318 | return err; | ||
319 | } | ||
320 | |||
321 | /* | ||
322 | * Allocate the DMA memory for a page directory. This handles the necessary PD | ||
323 | * cache logistics. Since on Parker and later GPUs some of the page directories | ||
324 | * are smaller than a page packing these PDs together saves a lot of memory. | ||
325 | */ | ||
326 | int __nvgpu_pd_alloc(struct vm_gk20a *vm, struct nvgpu_gmmu_pd *pd, u32 bytes) | ||
327 | { | ||
328 | struct gk20a *g = gk20a_from_vm(vm); | ||
329 | int err; | ||
330 | |||
331 | /* | ||
332 | * Simple case: PD is bigger than a page so just do a regular DMA | ||
333 | * alloc. | ||
334 | */ | ||
335 | if (bytes >= PAGE_SIZE) { | ||
336 | err = __nvgpu_pd_cache_alloc_direct(g, pd, bytes); | ||
337 | if (err) | ||
338 | return err; | ||
339 | |||
340 | return 0; | ||
341 | } | ||
342 | |||
343 | if (WARN_ON(!g->mm.pd_cache)) | ||
344 | return -ENOMEM; | ||
345 | |||
346 | nvgpu_mutex_acquire(&g->mm.pd_cache->lock); | ||
347 | err = nvgpu_pd_cache_alloc(g, g->mm.pd_cache, pd, bytes); | ||
348 | nvgpu_mutex_release(&g->mm.pd_cache->lock); | ||
349 | |||
350 | return err; | ||
351 | } | ||
352 | |||
353 | void __nvgpu_pd_cache_free_direct(struct gk20a *g, struct nvgpu_gmmu_pd *pd) | ||
354 | { | ||
355 | pd_dbg(g, "PD-Free [D] 0x%p", pd->mem); | ||
356 | |||
357 | if (!pd->mem) | ||
358 | return; | ||
359 | |||
360 | nvgpu_dma_free(g, pd->mem); | ||
361 | nvgpu_kfree(g, pd->mem); | ||
362 | pd->mem = NULL; | ||
363 | } | ||
364 | |||
365 | static void nvgpu_pd_cache_free_mem_entry(struct gk20a *g, | ||
366 | struct nvgpu_pd_cache *cache, | ||
367 | struct nvgpu_pd_mem_entry *pentry) | ||
368 | { | ||
369 | nvgpu_dma_free(g, &pentry->mem); | ||
370 | nvgpu_list_del(&pentry->list_entry); | ||
371 | nvgpu_rbtree_unlink(&pentry->tree_entry, &cache->mem_tree); | ||
372 | nvgpu_kfree(g, pentry); | ||
373 | } | ||
374 | |||
375 | static void nvgpu_pd_cache_do_free(struct gk20a *g, | ||
376 | struct nvgpu_pd_cache *cache, | ||
377 | struct nvgpu_pd_mem_entry *pentry, | ||
378 | struct nvgpu_gmmu_pd *pd) | ||
379 | { | ||
380 | u32 index = pd->mem_offs / pentry->pd_size; | ||
381 | u32 bit = 1 << index; | ||
382 | |||
383 | /* Mark entry as free. */ | ||
384 | pentry->alloc_map &= ~bit; | ||
385 | |||
386 | if (pentry->alloc_map & nvgpu_pd_cache_get_mask(pentry)) { | ||
387 | /* | ||
388 | * Partially full still. If it was already on the partial list | ||
389 | * this just re-adds it. | ||
390 | */ | ||
391 | nvgpu_list_del(&pentry->list_entry); | ||
392 | nvgpu_list_add(&pentry->list_entry, | ||
393 | &cache->partial[nvgpu_pd_cache_nr(pentry->pd_size)]); | ||
394 | } else { | ||
395 | /* Empty now so free it. */ | ||
396 | nvgpu_pd_cache_free_mem_entry(g, cache, pentry); | ||
397 | } | ||
398 | } | ||
399 | |||
400 | static struct nvgpu_pd_mem_entry *nvgpu_pd_cache_look_up( | ||
401 | struct gk20a *g, | ||
402 | struct nvgpu_pd_cache *cache, | ||
403 | struct nvgpu_gmmu_pd *pd) | ||
404 | { | ||
405 | struct nvgpu_rbtree_node *node; | ||
406 | |||
407 | nvgpu_rbtree_search((u64)(uintptr_t)pd->mem, &node, | ||
408 | cache->mem_tree); | ||
409 | if (!node) | ||
410 | return NULL; | ||
411 | |||
412 | return nvgpu_pd_mem_entry_from_tree_entry(node); | ||
413 | } | ||
414 | |||
415 | static void nvgpu_pd_cache_free(struct gk20a *g, struct nvgpu_pd_cache *cache, | ||
416 | struct nvgpu_gmmu_pd *pd) | ||
417 | { | ||
418 | struct nvgpu_pd_mem_entry *pentry; | ||
419 | |||
420 | pd_dbg(g, "PD-Free [C] 0x%p", pd->mem); | ||
421 | |||
422 | pentry = nvgpu_pd_cache_look_up(g, cache, pd); | ||
423 | if (!pentry) { | ||
424 | WARN(1, "Attempting to free non-existent pd"); | ||
425 | return; | ||
426 | } | ||
427 | |||
428 | nvgpu_pd_cache_do_free(g, cache, pentry, pd); | ||
429 | } | ||
430 | |||
431 | void __nvgpu_pd_free(struct vm_gk20a *vm, struct nvgpu_gmmu_pd *pd) | ||
432 | { | ||
433 | struct gk20a *g = gk20a_from_vm(vm); | ||
434 | |||
435 | /* | ||
436 | * Simple case: just DMA free. | ||
437 | */ | ||
438 | if (!pd->cached) | ||
439 | return __nvgpu_pd_cache_free_direct(g, pd); | ||
440 | |||
441 | nvgpu_mutex_acquire(&g->mm.pd_cache->lock); | ||
442 | nvgpu_pd_cache_free(g, g->mm.pd_cache, pd); | ||
443 | nvgpu_mutex_release(&g->mm.pd_cache->lock); | ||
444 | } | ||