summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/common/mm/pd_cache.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/common/mm/pd_cache.c')
-rw-r--r--drivers/gpu/nvgpu/common/mm/pd_cache.c444
1 files changed, 444 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/mm/pd_cache.c b/drivers/gpu/nvgpu/common/mm/pd_cache.c
new file mode 100644
index 00000000..4c3e06ba
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/mm/pd_cache.c
@@ -0,0 +1,444 @@
1/*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23#include <nvgpu/log.h>
24#include <nvgpu/dma.h>
25#include <nvgpu/gmmu.h>
26#include <nvgpu/nvgpu_mem.h>
27#include <nvgpu/list.h>
28#include <nvgpu/log2.h>
29
30#include "gk20a/gk20a.h"
31#include "gk20a/mm_gk20a.h"
32
33#define pd_dbg(g, fmt, args...) nvgpu_log(g, gpu_dbg_pd_cache, fmt, ##args)
34
35/**
36 * DOC: PD cache
37 *
38 * In the name of saving memory with the many sub-page sized PD levels in Pascal
39 * and beyond a way of packing PD tables together is necessary. This code here
40 * does just that. If a PD table only requires 1024 bytes, then it is possible
41 * to have 4 of these PDs in one page. This is even more pronounced for 256 byte
42 * PD tables.
43 *
44 * The pd cache is basially just a slab allocator. Each instance of the nvgpu
45 * driver makes one of these structs:
46 *
47 * struct nvgpu_pd_cache {
48 * struct nvgpu_list_node full[NVGPU_PD_CACHE_COUNT];
49 * struct nvgpu_list_node partial[NVGPU_PD_CACHE_COUNT];
50 *
51 * struct nvgpu_rbtree_node *mem_tree;
52 * };
53 *
54 * There are two sets of lists, the full and the partial. The full lists contain
55 * pages of memory for which all the memory in that page is in use. The partial
56 * lists contain partially full pages of memory which can be used for more PD
57 * allocations. There a couple of assumptions here:
58 *
59 * 1. PDs greater than or equal to the page size bypass the pd cache.
60 * 2. PDs are always power of 2 and greater than %NVGPU_PD_CACHE_MIN bytes.
61 *
62 * There are NVGPU_PD_CACHE_COUNT full lists and the same number of partial
63 * lists. For a 4Kb page NVGPU_PD_CACHE_COUNT is 4. This is enough space for
64 * 256, 512, 1024, and 2048 byte PDs.
65 *
66 * __nvgpu_pd_alloc() will allocate a PD for the GMMU. It will check if the PD
67 * size is page size or larger and choose the correct allocation scheme - either
68 * from the PD cache or directly. Similarly __nvgpu_pd_free() will free a PD
69 * allocated by __nvgpu_pd_alloc().
70 *
71 * Since the top level PD (the PDB) is a page aligned pointer but less than a
72 * page size the direct functions must be used for allocating PDBs. Otherwise
73 * there would be alignment issues for the PDBs when they get packed.
74 */
75
76static u32 nvgpu_pd_cache_nr(u32 bytes)
77{
78 return ilog2(bytes >> (NVGPU_PD_CACHE_MIN_SHIFT - 1));
79}
80
81static u32 nvgpu_pd_cache_get_mask(struct nvgpu_pd_mem_entry *pentry)
82{
83 u32 mask_offset = 1 << (PAGE_SIZE / pentry->pd_size);
84
85 return mask_offset - 1;
86}
87
88int nvgpu_pd_cache_init(struct gk20a *g)
89{
90 struct nvgpu_pd_cache *cache;
91 int i;
92
93 /*
94 * This gets called from finalize_poweron() so we need to make sure we
95 * don't reinit the pd_cache over and over.
96 */
97 if (g->mm.pd_cache)
98 return 0;
99
100 cache = nvgpu_kzalloc(g, sizeof(*cache));
101 if (!cache) {
102 nvgpu_err(g, "Failed to alloc pd_cache!");
103 return -ENOMEM;
104 }
105
106 for (i = 0; i < NVGPU_PD_CACHE_COUNT; i++) {
107 nvgpu_init_list_node(&cache->full[i]);
108 nvgpu_init_list_node(&cache->partial[i]);
109 }
110
111 cache->mem_tree = NULL;
112 g->mm.pd_cache = cache;
113 nvgpu_mutex_init(&cache->lock);
114
115 pd_dbg(g, "PD cache initialized!");
116
117 return 0;
118}
119
120void nvgpu_pd_cache_fini(struct gk20a *g)
121{
122 int i;
123 struct nvgpu_pd_cache *cache = g->mm.pd_cache;
124
125 if (!cache)
126 return;
127
128 for (i = 0; i < NVGPU_PD_CACHE_COUNT; i++) {
129 WARN_ON(!nvgpu_list_empty(&cache->full[i]));
130 WARN_ON(!nvgpu_list_empty(&cache->partial[i]));
131 }
132
133 nvgpu_kfree(g, g->mm.pd_cache);
134}
135
136/*
137 * This is the simple pass-through for greater than page or page sized PDs.
138 *
139 * Note: this does not need the cache lock since it does not modify any of the
140 * PD cache data structures.
141 */
142int __nvgpu_pd_cache_alloc_direct(struct gk20a *g,
143 struct nvgpu_gmmu_pd *pd, u32 bytes)
144{
145 int err;
146 unsigned long flags = 0;
147
148 pd_dbg(g, "PD-Alloc [D] %u bytes", bytes);
149
150 pd->mem = nvgpu_kzalloc(g, sizeof(*pd->mem));
151 if (!pd->mem) {
152 nvgpu_err(g, "OOM allocating nvgpu_mem struct!");
153 return -ENOMEM;
154 }
155
156 /*
157 * If bytes == PAGE_SIZE then it's impossible to get a discontiguous DMA
158 * allocation. Some DMA implementations may, despite this fact, still
159 * use the contiguous pool for page sized allocations. As such only
160 * request explicitly contiguous allocs if the page directory is larger
161 * than the page size. Also, of course, this is all only revelant for
162 * GPUs not using an IOMMU. If there is an IOMMU DMA allocs are always
163 * going to be virtually contiguous and we don't have to force the
164 * underlying allocations to be physically contiguous as well.
165 */
166 if (!nvgpu_iommuable(g) && bytes > PAGE_SIZE)
167 flags = NVGPU_DMA_FORCE_CONTIGUOUS;
168
169 err = nvgpu_dma_alloc_flags(g, flags, bytes, pd->mem);
170 if (err) {
171 nvgpu_err(g, "OOM allocating page directory!");
172 nvgpu_kfree(g, pd->mem);
173 return -ENOMEM;
174 }
175
176 pd->cached = false;
177 pd->mem_offs = 0;
178
179 return 0;
180}
181
182/*
183 * Make a new nvgpu_pd_cache_entry and allocate a PD from it. Update the passed
184 * pd to reflect this allocation.
185 */
186static int nvgpu_pd_cache_alloc_new(struct gk20a *g,
187 struct nvgpu_pd_cache *cache,
188 struct nvgpu_gmmu_pd *pd,
189 u32 bytes)
190{
191 struct nvgpu_pd_mem_entry *pentry;
192
193 pd_dbg(g, "PD-Alloc [C] New: offs=0");
194
195 pentry = nvgpu_kzalloc(g, sizeof(*pentry));
196 if (!pentry) {
197 nvgpu_err(g, "OOM allocating pentry!");
198 return -ENOMEM;
199 }
200
201 if (nvgpu_dma_alloc(g, PAGE_SIZE, &pentry->mem)) {
202 nvgpu_kfree(g, pentry);
203 nvgpu_err(g, "Unable to DMA alloc!");
204 return -ENOMEM;
205 }
206
207 pentry->pd_size = bytes;
208 nvgpu_list_add(&pentry->list_entry,
209 &cache->partial[nvgpu_pd_cache_nr(bytes)]);
210
211 /*
212 * This allocates the very first PD table in the set of tables in this
213 * nvgpu_pd_mem_entry.
214 */
215 pentry->alloc_map = 1;
216
217 /*
218 * Now update the nvgpu_gmmu_pd to reflect this allocation.
219 */
220 pd->mem = &pentry->mem;
221 pd->mem_offs = 0;
222 pd->cached = true;
223
224 pentry->tree_entry.key_start = (u64)(uintptr_t)&pentry->mem;
225 nvgpu_rbtree_insert(&pentry->tree_entry, &cache->mem_tree);
226
227 return 0;
228}
229
230static int nvgpu_pd_cache_alloc_from_partial(struct gk20a *g,
231 struct nvgpu_pd_cache *cache,
232 struct nvgpu_pd_mem_entry *pentry,
233 struct nvgpu_gmmu_pd *pd)
234{
235 unsigned long bit_offs;
236 u32 mem_offs;
237 u32 pentry_mask = nvgpu_pd_cache_get_mask(pentry);
238
239 /*
240 * Find and allocate an open PD.
241 */
242 bit_offs = ffz(pentry->alloc_map);
243 mem_offs = bit_offs * pentry->pd_size;
244
245 /* Bit map full. Somethings wrong. */
246 if (WARN_ON(bit_offs >= ffz(pentry_mask)))
247 return -ENOMEM;
248
249 pentry->alloc_map |= 1 << bit_offs;
250
251 pd_dbg(g, "PD-Alloc [C] Partial: offs=%lu", bit_offs);
252
253 /*
254 * First update the pd.
255 */
256 pd->mem = &pentry->mem;
257 pd->mem_offs = mem_offs;
258 pd->cached = true;
259
260 /*
261 * Now make sure the pentry is in the correct list (full vs partial).
262 */
263 if ((pentry->alloc_map & pentry_mask) == pentry_mask) {
264 pd_dbg(g, "Adding pentry to full list!");
265 nvgpu_list_del(&pentry->list_entry);
266 nvgpu_list_add(&pentry->list_entry,
267 &cache->full[nvgpu_pd_cache_nr(pentry->pd_size)]);
268 }
269
270 return 0;
271}
272
273/*
274 * Get a partially full nvgpu_pd_mem_entry. Returns NULL if there is no partial
275 * nvgpu_pd_mem_entry's.
276 */
277static struct nvgpu_pd_mem_entry *nvgpu_pd_cache_get_partial(
278 struct nvgpu_pd_cache *cache, u32 bytes)
279{
280 struct nvgpu_list_node *list =
281 &cache->partial[nvgpu_pd_cache_nr(bytes)];
282
283 if (nvgpu_list_empty(list))
284 return NULL;
285
286 return nvgpu_list_first_entry(list,
287 nvgpu_pd_mem_entry,
288 list_entry);
289}
290
291/*
292 * Allocate memory from an nvgpu_mem for the page directory.
293 */
294static int nvgpu_pd_cache_alloc(struct gk20a *g, struct nvgpu_pd_cache *cache,
295 struct nvgpu_gmmu_pd *pd, u32 bytes)
296{
297 struct nvgpu_pd_mem_entry *pentry;
298 int err;
299
300 pd_dbg(g, "PD-Alloc [C] %u bytes", bytes);
301
302 if (bytes & (bytes - 1) ||
303 (bytes >= PAGE_SIZE ||
304 bytes < NVGPU_PD_CACHE_MIN)) {
305 pd_dbg(g, "PD-Alloc [C] Invalid (bytes=%u)!", bytes);
306 return -EINVAL;
307 }
308
309 pentry = nvgpu_pd_cache_get_partial(cache, bytes);
310 if (!pentry)
311 err = nvgpu_pd_cache_alloc_new(g, cache, pd, bytes);
312 else
313 err = nvgpu_pd_cache_alloc_from_partial(g, cache, pentry, pd);
314
315 if (err)
316 nvgpu_err(g, "PD-Alloc [C] Failed!");
317
318 return err;
319}
320
321/*
322 * Allocate the DMA memory for a page directory. This handles the necessary PD
323 * cache logistics. Since on Parker and later GPUs some of the page directories
324 * are smaller than a page packing these PDs together saves a lot of memory.
325 */
326int __nvgpu_pd_alloc(struct vm_gk20a *vm, struct nvgpu_gmmu_pd *pd, u32 bytes)
327{
328 struct gk20a *g = gk20a_from_vm(vm);
329 int err;
330
331 /*
332 * Simple case: PD is bigger than a page so just do a regular DMA
333 * alloc.
334 */
335 if (bytes >= PAGE_SIZE) {
336 err = __nvgpu_pd_cache_alloc_direct(g, pd, bytes);
337 if (err)
338 return err;
339
340 return 0;
341 }
342
343 if (WARN_ON(!g->mm.pd_cache))
344 return -ENOMEM;
345
346 nvgpu_mutex_acquire(&g->mm.pd_cache->lock);
347 err = nvgpu_pd_cache_alloc(g, g->mm.pd_cache, pd, bytes);
348 nvgpu_mutex_release(&g->mm.pd_cache->lock);
349
350 return err;
351}
352
353void __nvgpu_pd_cache_free_direct(struct gk20a *g, struct nvgpu_gmmu_pd *pd)
354{
355 pd_dbg(g, "PD-Free [D] 0x%p", pd->mem);
356
357 if (!pd->mem)
358 return;
359
360 nvgpu_dma_free(g, pd->mem);
361 nvgpu_kfree(g, pd->mem);
362 pd->mem = NULL;
363}
364
365static void nvgpu_pd_cache_free_mem_entry(struct gk20a *g,
366 struct nvgpu_pd_cache *cache,
367 struct nvgpu_pd_mem_entry *pentry)
368{
369 nvgpu_dma_free(g, &pentry->mem);
370 nvgpu_list_del(&pentry->list_entry);
371 nvgpu_rbtree_unlink(&pentry->tree_entry, &cache->mem_tree);
372 nvgpu_kfree(g, pentry);
373}
374
375static void nvgpu_pd_cache_do_free(struct gk20a *g,
376 struct nvgpu_pd_cache *cache,
377 struct nvgpu_pd_mem_entry *pentry,
378 struct nvgpu_gmmu_pd *pd)
379{
380 u32 index = pd->mem_offs / pentry->pd_size;
381 u32 bit = 1 << index;
382
383 /* Mark entry as free. */
384 pentry->alloc_map &= ~bit;
385
386 if (pentry->alloc_map & nvgpu_pd_cache_get_mask(pentry)) {
387 /*
388 * Partially full still. If it was already on the partial list
389 * this just re-adds it.
390 */
391 nvgpu_list_del(&pentry->list_entry);
392 nvgpu_list_add(&pentry->list_entry,
393 &cache->partial[nvgpu_pd_cache_nr(pentry->pd_size)]);
394 } else {
395 /* Empty now so free it. */
396 nvgpu_pd_cache_free_mem_entry(g, cache, pentry);
397 }
398}
399
400static struct nvgpu_pd_mem_entry *nvgpu_pd_cache_look_up(
401 struct gk20a *g,
402 struct nvgpu_pd_cache *cache,
403 struct nvgpu_gmmu_pd *pd)
404{
405 struct nvgpu_rbtree_node *node;
406
407 nvgpu_rbtree_search((u64)(uintptr_t)pd->mem, &node,
408 cache->mem_tree);
409 if (!node)
410 return NULL;
411
412 return nvgpu_pd_mem_entry_from_tree_entry(node);
413}
414
415static void nvgpu_pd_cache_free(struct gk20a *g, struct nvgpu_pd_cache *cache,
416 struct nvgpu_gmmu_pd *pd)
417{
418 struct nvgpu_pd_mem_entry *pentry;
419
420 pd_dbg(g, "PD-Free [C] 0x%p", pd->mem);
421
422 pentry = nvgpu_pd_cache_look_up(g, cache, pd);
423 if (!pentry) {
424 WARN(1, "Attempting to free non-existent pd");
425 return;
426 }
427
428 nvgpu_pd_cache_do_free(g, cache, pentry, pd);
429}
430
431void __nvgpu_pd_free(struct vm_gk20a *vm, struct nvgpu_gmmu_pd *pd)
432{
433 struct gk20a *g = gk20a_from_vm(vm);
434
435 /*
436 * Simple case: just DMA free.
437 */
438 if (!pd->cached)
439 return __nvgpu_pd_cache_free_direct(g, pd);
440
441 nvgpu_mutex_acquire(&g->mm.pd_cache->lock);
442 nvgpu_pd_cache_free(g, g->mm.pd_cache, pd);
443 nvgpu_mutex_release(&g->mm.pd_cache->lock);
444}