diff options
Diffstat (limited to 'drivers/gpu/nvgpu')
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/pd_cache.c | 94 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/include/nvgpu/gmmu.h | 23 |
2 files changed, 78 insertions, 39 deletions
diff --git a/drivers/gpu/nvgpu/common/mm/pd_cache.c b/drivers/gpu/nvgpu/common/mm/pd_cache.c index d9dc3db0..a5b3d134 100644 --- a/drivers/gpu/nvgpu/common/mm/pd_cache.c +++ b/drivers/gpu/nvgpu/common/mm/pd_cache.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | 2 | * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved. |
3 | * | 3 | * |
4 | * Permission is hereby granted, free of charge, to any person obtaining a | 4 | * Permission is hereby granted, free of charge, to any person obtaining a |
5 | * copy of this software and associated documentation files (the "Software"), | 5 | * copy of this software and associated documentation files (the "Software"), |
@@ -42,7 +42,7 @@ | |||
42 | * to have 4 of these PDs in one page. This is even more pronounced for 256 byte | 42 | * to have 4 of these PDs in one page. This is even more pronounced for 256 byte |
43 | * PD tables. | 43 | * PD tables. |
44 | * | 44 | * |
45 | * The pd cache is basially just a slab allocator. Each instance of the nvgpu | 45 | * The pd cache is basically a slab allocator. Each instance of the nvgpu |
46 | * driver makes one of these structs: | 46 | * driver makes one of these structs: |
47 | * | 47 | * |
48 | * struct nvgpu_pd_cache { | 48 | * struct nvgpu_pd_cache { |
@@ -52,22 +52,18 @@ | |||
52 | * struct nvgpu_rbtree_node *mem_tree; | 52 | * struct nvgpu_rbtree_node *mem_tree; |
53 | * }; | 53 | * }; |
54 | * | 54 | * |
55 | * There are two sets of lists, the full and the partial. The full lists contain | 55 | * There are two sets of lists used for cached allocations, the full and the |
56 | * pages of memory for which all the memory in that page is in use. The partial | 56 | * partial. The full lists contain pages of memory for which all the memory in |
57 | * lists contain partially full pages of memory which can be used for more PD | 57 | * that entry is in use. The partial lists contain partially full blocks of |
58 | * allocations. There a couple of assumptions here: | 58 | * memory which can be used for more PD allocations. The cache works as follows: |
59 | * | 59 | * |
60 | * 1. PDs greater than or equal to the page size bypass the pd cache. | 60 | * 1. PDs greater than NVGPU_PD_CACHE_SIZE bypass the pd cache. |
61 | * 2. PDs are always power of 2 and greater than %NVGPU_PD_CACHE_MIN bytes. | 61 | * 2. PDs are always power of 2 and greater than %NVGPU_PD_CACHE_MIN bytes. |
62 | * | 62 | * |
63 | * There are NVGPU_PD_CACHE_COUNT full lists and the same number of partial | ||
64 | * lists. For a 4Kb page NVGPU_PD_CACHE_COUNT is 4. This is enough space for | ||
65 | * 256, 512, 1024, and 2048 byte PDs. | ||
66 | * | ||
67 | * nvgpu_pd_alloc() will allocate a PD for the GMMU. It will check if the PD | 63 | * nvgpu_pd_alloc() will allocate a PD for the GMMU. It will check if the PD |
68 | * size is page size or larger and choose the correct allocation scheme - either | 64 | * size is NVGPU_PD_CACHE_SIZE or larger and choose the correct allocation |
69 | * from the PD cache or directly. Similarly nvgpu_pd_free() will free a PD | 65 | * scheme - either from the PD cache or directly. Similarly nvgpu_pd_free() |
70 | * allocated by nvgpu_pd_alloc(). | 66 | * will free a PD allocated by nvgpu_pd_alloc(). |
71 | * | 67 | * |
72 | * Since the top level PD (the PDB) is a page aligned pointer but less than a | 68 | * Since the top level PD (the PDB) is a page aligned pointer but less than a |
73 | * page size the direct functions must be used for allocating PDBs. Otherwise | 69 | * page size the direct functions must be used for allocating PDBs. Otherwise |
@@ -79,11 +75,11 @@ static u32 nvgpu_pd_cache_nr(u32 bytes) | |||
79 | return ilog2(bytes >> (NVGPU_PD_CACHE_MIN_SHIFT - 1U)); | 75 | return ilog2(bytes >> (NVGPU_PD_CACHE_MIN_SHIFT - 1U)); |
80 | } | 76 | } |
81 | 77 | ||
82 | static u32 nvgpu_pd_cache_get_mask(struct nvgpu_pd_mem_entry *pentry) | 78 | static u32 nvgpu_pd_cache_get_nr_entries(struct nvgpu_pd_mem_entry *pentry) |
83 | { | 79 | { |
84 | u32 mask_offset = 1 << (PAGE_SIZE / pentry->pd_size); | 80 | BUG_ON(pentry->pd_size == 0); |
85 | 81 | ||
86 | return mask_offset - 1U; | 82 | return NVGPU_PD_CACHE_SIZE / pentry->pd_size; |
87 | } | 83 | } |
88 | 84 | ||
89 | int nvgpu_pd_cache_init(struct gk20a *g) | 85 | int nvgpu_pd_cache_init(struct gk20a *g) |
@@ -201,6 +197,8 @@ static int nvgpu_pd_cache_alloc_new(struct gk20a *g, | |||
201 | u32 bytes) | 197 | u32 bytes) |
202 | { | 198 | { |
203 | struct nvgpu_pd_mem_entry *pentry; | 199 | struct nvgpu_pd_mem_entry *pentry; |
200 | unsigned long flags = 0; | ||
201 | int err; | ||
204 | 202 | ||
205 | pd_dbg(g, "PD-Alloc [C] New: offs=0"); | 203 | pd_dbg(g, "PD-Alloc [C] New: offs=0"); |
206 | 204 | ||
@@ -210,8 +208,21 @@ static int nvgpu_pd_cache_alloc_new(struct gk20a *g, | |||
210 | return -ENOMEM; | 208 | return -ENOMEM; |
211 | } | 209 | } |
212 | 210 | ||
213 | if (nvgpu_dma_alloc(g, PAGE_SIZE, &pentry->mem)) { | 211 | if (!nvgpu_iommuable(g) && (NVGPU_PD_CACHE_SIZE > PAGE_SIZE)) { |
212 | flags = NVGPU_DMA_FORCE_CONTIGUOUS; | ||
213 | } | ||
214 | |||
215 | err = nvgpu_dma_alloc_flags(g, flags, | ||
216 | NVGPU_PD_CACHE_SIZE, &pentry->mem); | ||
217 | if (err != 0) { | ||
214 | nvgpu_kfree(g, pentry); | 218 | nvgpu_kfree(g, pentry); |
219 | |||
220 | /* Not enough contiguous space, but a direct | ||
221 | * allocation may work | ||
222 | */ | ||
223 | if (err == -ENOMEM) { | ||
224 | return nvgpu_pd_cache_alloc_direct(g, pd, bytes); | ||
225 | } | ||
215 | nvgpu_err(g, "Unable to DMA alloc!"); | 226 | nvgpu_err(g, "Unable to DMA alloc!"); |
216 | return -ENOMEM; | 227 | return -ENOMEM; |
217 | } | 228 | } |
@@ -224,7 +235,8 @@ static int nvgpu_pd_cache_alloc_new(struct gk20a *g, | |||
224 | * This allocates the very first PD table in the set of tables in this | 235 | * This allocates the very first PD table in the set of tables in this |
225 | * nvgpu_pd_mem_entry. | 236 | * nvgpu_pd_mem_entry. |
226 | */ | 237 | */ |
227 | pentry->alloc_map = 1; | 238 | set_bit(0U, pentry->alloc_map); |
239 | pentry->allocs = 1; | ||
228 | 240 | ||
229 | /* | 241 | /* |
230 | * Now update the nvgpu_gmmu_pd to reflect this allocation. | 242 | * Now update the nvgpu_gmmu_pd to reflect this allocation. |
@@ -246,20 +258,21 @@ static int nvgpu_pd_cache_alloc_from_partial(struct gk20a *g, | |||
246 | { | 258 | { |
247 | unsigned long bit_offs; | 259 | unsigned long bit_offs; |
248 | u32 mem_offs; | 260 | u32 mem_offs; |
249 | u32 pentry_mask = nvgpu_pd_cache_get_mask(pentry); | 261 | u32 nr_bits = nvgpu_pd_cache_get_nr_entries(pentry); |
250 | 262 | ||
251 | /* | 263 | /* |
252 | * Find and allocate an open PD. | 264 | * Find and allocate an open PD. |
253 | */ | 265 | */ |
254 | bit_offs = ffz(pentry->alloc_map); | 266 | bit_offs = find_first_zero_bit(pentry->alloc_map, nr_bits); |
255 | mem_offs = bit_offs * pentry->pd_size; | 267 | mem_offs = bit_offs * pentry->pd_size; |
256 | 268 | ||
257 | /* Bit map full. Somethings wrong. */ | 269 | /* Bit map full. Somethings wrong. */ |
258 | if (WARN_ON(bit_offs >= ffz(pentry_mask))) { | 270 | if (WARN_ON(bit_offs >= nr_bits)) { |
259 | return -ENOMEM; | 271 | return -ENOMEM; |
260 | } | 272 | } |
261 | 273 | ||
262 | pentry->alloc_map |= 1 << bit_offs; | 274 | set_bit(bit_offs, pentry->alloc_map); |
275 | pentry->allocs++; | ||
263 | 276 | ||
264 | pd_dbg(g, "PD-Alloc [C] Partial: offs=%lu", bit_offs); | 277 | pd_dbg(g, "PD-Alloc [C] Partial: offs=%lu", bit_offs); |
265 | 278 | ||
@@ -273,7 +286,7 @@ static int nvgpu_pd_cache_alloc_from_partial(struct gk20a *g, | |||
273 | /* | 286 | /* |
274 | * Now make sure the pentry is in the correct list (full vs partial). | 287 | * Now make sure the pentry is in the correct list (full vs partial). |
275 | */ | 288 | */ |
276 | if ((pentry->alloc_map & pentry_mask) == pentry_mask) { | 289 | if (pentry->allocs >= nr_bits) { |
277 | pd_dbg(g, "Adding pentry to full list!"); | 290 | pd_dbg(g, "Adding pentry to full list!"); |
278 | nvgpu_list_del(&pentry->list_entry); | 291 | nvgpu_list_del(&pentry->list_entry); |
279 | nvgpu_list_add(&pentry->list_entry, | 292 | nvgpu_list_add(&pentry->list_entry, |
@@ -314,7 +327,7 @@ static int nvgpu_pd_cache_alloc(struct gk20a *g, struct nvgpu_pd_cache *cache, | |||
314 | pd_dbg(g, "PD-Alloc [C] %u bytes", bytes); | 327 | pd_dbg(g, "PD-Alloc [C] %u bytes", bytes); |
315 | 328 | ||
316 | if ((bytes & (bytes - 1U)) != 0U || | 329 | if ((bytes & (bytes - 1U)) != 0U || |
317 | (bytes >= PAGE_SIZE || | 330 | (bytes >= NVGPU_PD_CACHE_SIZE || |
318 | bytes < NVGPU_PD_CACHE_MIN)) { | 331 | bytes < NVGPU_PD_CACHE_MIN)) { |
319 | pd_dbg(g, "PD-Alloc [C] Invalid (bytes=%u)!", bytes); | 332 | pd_dbg(g, "PD-Alloc [C] Invalid (bytes=%u)!", bytes); |
320 | return -EINVAL; | 333 | return -EINVAL; |
@@ -339,16 +352,18 @@ static int nvgpu_pd_cache_alloc(struct gk20a *g, struct nvgpu_pd_cache *cache, | |||
339 | * cache logistics. Since on Parker and later GPUs some of the page directories | 352 | * cache logistics. Since on Parker and later GPUs some of the page directories |
340 | * are smaller than a page packing these PDs together saves a lot of memory. | 353 | * are smaller than a page packing these PDs together saves a lot of memory. |
341 | */ | 354 | */ |
342 | int nvgpu_pd_alloc(struct vm_gk20a *vm, struct nvgpu_gmmu_pd *pd, u32 bytes) | 355 | int nvgpu_pd_alloc(struct vm_gk20a *vm, |
356 | struct nvgpu_gmmu_pd *pd, | ||
357 | u32 bytes) | ||
343 | { | 358 | { |
344 | struct gk20a *g = gk20a_from_vm(vm); | 359 | struct gk20a *g = gk20a_from_vm(vm); |
345 | int err; | 360 | int err; |
346 | 361 | ||
347 | /* | 362 | /* |
348 | * Simple case: PD is bigger than a page so just do a regular DMA | 363 | * Simple case: PD is bigger than or equal to NVGPU_PD_CACHE_SIZE so |
349 | * alloc. | 364 | * just do a regular DMA alloc. |
350 | */ | 365 | */ |
351 | if (bytes >= PAGE_SIZE) { | 366 | if (bytes >= NVGPU_PD_CACHE_SIZE) { |
352 | err = nvgpu_pd_cache_alloc_direct(g, pd, bytes); | 367 | err = nvgpu_pd_cache_alloc_direct(g, pd, bytes); |
353 | if (err) { | 368 | if (err) { |
354 | return err; | 369 | return err; |
@@ -396,17 +411,28 @@ static void nvgpu_pd_cache_do_free(struct gk20a *g, | |||
396 | struct nvgpu_pd_mem_entry *pentry, | 411 | struct nvgpu_pd_mem_entry *pentry, |
397 | struct nvgpu_gmmu_pd *pd) | 412 | struct nvgpu_gmmu_pd *pd) |
398 | { | 413 | { |
399 | u32 index = pd->mem_offs / pentry->pd_size; | 414 | u32 bit = pd->mem_offs / pentry->pd_size; |
400 | u32 bit = 1 << index; | ||
401 | 415 | ||
402 | /* Mark entry as free. */ | 416 | /* Mark entry as free. */ |
403 | pentry->alloc_map &= ~bit; | 417 | clear_bit(bit, pentry->alloc_map); |
418 | pentry->allocs--; | ||
404 | 419 | ||
405 | if (pentry->alloc_map & nvgpu_pd_cache_get_mask(pentry)) { | 420 | if (pentry->allocs > 0U) { |
406 | /* | 421 | /* |
407 | * Partially full still. If it was already on the partial list | 422 | * Partially full still. If it was already on the partial list |
408 | * this just re-adds it. | 423 | * this just re-adds it. |
424 | * | ||
425 | * Since the memory used for the entries is still mapped, if | ||
426 | * igpu make sure the entries are invalidated so that the hw | ||
427 | * doesn't accidentally try to prefetch non-existent fb memory. | ||
428 | * | ||
429 | * TBD: what about dgpu? (Not supported in Drive 5.0) | ||
409 | */ | 430 | */ |
431 | if (pd->mem->cpu_va != NULL) { | ||
432 | memset((void *)((u64)pd->mem->cpu_va + pd->mem_offs), 0, | ||
433 | pentry->pd_size); | ||
434 | } | ||
435 | |||
410 | nvgpu_list_del(&pentry->list_entry); | 436 | nvgpu_list_del(&pentry->list_entry); |
411 | nvgpu_list_add(&pentry->list_entry, | 437 | nvgpu_list_add(&pentry->list_entry, |
412 | &cache->partial[nvgpu_pd_cache_nr(pentry->pd_size)]); | 438 | &cache->partial[nvgpu_pd_cache_nr(pentry->pd_size)]); |
@@ -414,6 +440,8 @@ static void nvgpu_pd_cache_do_free(struct gk20a *g, | |||
414 | /* Empty now so free it. */ | 440 | /* Empty now so free it. */ |
415 | nvgpu_pd_cache_free_mem_entry(g, cache, pentry); | 441 | nvgpu_pd_cache_free_mem_entry(g, cache, pentry); |
416 | } | 442 | } |
443 | |||
444 | pd->mem = NULL; | ||
417 | } | 445 | } |
418 | 446 | ||
419 | static struct nvgpu_pd_mem_entry *nvgpu_pd_cache_look_up( | 447 | static struct nvgpu_pd_mem_entry *nvgpu_pd_cache_look_up( |
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h index 8e1eeedc..ee2207b8 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <nvgpu/list.h> | 28 | #include <nvgpu/list.h> |
29 | #include <nvgpu/rbtree.h> | 29 | #include <nvgpu/rbtree.h> |
30 | #include <nvgpu/lock.h> | 30 | #include <nvgpu/lock.h> |
31 | #include <nvgpu/bitops.h> | ||
31 | 32 | ||
32 | /* | 33 | /* |
33 | * This is the GMMU API visible to blocks outside of the GMMU. Basically this | 34 | * This is the GMMU API visible to blocks outside of the GMMU. Basically this |
@@ -56,18 +57,25 @@ enum gk20a_mem_rw_flag { | |||
56 | */ | 57 | */ |
57 | #define NVGPU_PD_CACHE_MIN 256U | 58 | #define NVGPU_PD_CACHE_MIN 256U |
58 | #define NVGPU_PD_CACHE_MIN_SHIFT 9U | 59 | #define NVGPU_PD_CACHE_MIN_SHIFT 9U |
59 | #define NVGPU_PD_CACHE_COUNT 4U | 60 | #define NVGPU_PD_CACHE_COUNT 8U |
61 | #define NVGPU_PD_CACHE_SIZE (NVGPU_PD_CACHE_MIN * (1U << NVGPU_PD_CACHE_COUNT)) | ||
60 | 62 | ||
61 | struct nvgpu_pd_mem_entry { | 63 | struct nvgpu_pd_mem_entry { |
62 | struct nvgpu_mem mem; | 64 | struct nvgpu_mem mem; |
63 | 65 | ||
64 | /* | 66 | /* |
65 | * Size of the page directories (not the mem). bmap is a bitmap showing | 67 | * Size of the page directories (not the mem). alloc_map is a bitmap |
66 | * which PDs have been allocated. The size of mem will always be one | 68 | * showing which PDs have been allocated. |
67 | * page. pd_size will always be a power of 2. | 69 | * |
70 | * The size of mem will be NVGPU_PD_CACHE_SIZE | ||
71 | * and pd_size will always be a power of 2. | ||
72 | * | ||
68 | */ | 73 | */ |
69 | u32 pd_size; | 74 | u32 pd_size; |
70 | unsigned long alloc_map; | 75 | DECLARE_BITMAP(alloc_map, NVGPU_PD_CACHE_SIZE / NVGPU_PD_CACHE_MIN); |
76 | |||
77 | /* Total number of allocations in this PD. */ | ||
78 | u32 allocs; | ||
71 | 79 | ||
72 | struct nvgpu_list_node list_entry; | 80 | struct nvgpu_list_node list_entry; |
73 | struct nvgpu_rbtree_node tree_entry; | 81 | struct nvgpu_rbtree_node tree_entry; |
@@ -251,7 +259,10 @@ void nvgpu_gmmu_unmap(struct vm_gk20a *vm, | |||
251 | struct nvgpu_mem *mem, | 259 | struct nvgpu_mem *mem, |
252 | u64 gpu_va); | 260 | u64 gpu_va); |
253 | 261 | ||
254 | int nvgpu_pd_alloc(struct vm_gk20a *vm, struct nvgpu_gmmu_pd *pd, u32 bytes); | 262 | int nvgpu_pd_alloc(struct vm_gk20a *vm, |
263 | struct nvgpu_gmmu_pd *pd, | ||
264 | u32 bytes); | ||
265 | |||
255 | void nvgpu_pd_free(struct vm_gk20a *vm, struct nvgpu_gmmu_pd *pd); | 266 | void nvgpu_pd_free(struct vm_gk20a *vm, struct nvgpu_gmmu_pd *pd); |
256 | int nvgpu_pd_cache_alloc_direct(struct gk20a *g, | 267 | int nvgpu_pd_cache_alloc_direct(struct gk20a *g, |
257 | struct nvgpu_gmmu_pd *pd, u32 bytes); | 268 | struct nvgpu_gmmu_pd *pd, u32 bytes); |