summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/vgpu
diff options
context:
space:
mode:
authorAlex Waterman <alexw@nvidia.com>2015-03-18 16:33:09 -0400
committerTerje Bergstrom <tbergstrom@nvidia.com>2015-05-11 11:53:25 -0400
commita2e852364582e9c337f52bc53ccc33877c8f3b47 (patch)
treefb13c5ad80db8eb2424a753a92389c7a3a322a12 /drivers/gpu/nvgpu/vgpu
parent0566aee853eb32f4f796499b6b00ddf0f1d7de34 (diff)
gpu: nvgpu: New allocator for VA space
Implement a new buddy allocation scheme for the GPU's VA space. The bitmap allocator was using too much memory and is not a scaleable solution as the GPU's address space keeps getting bigger. The buddy allocation scheme is much more memory efficient when the majority of the address space is not allocated. The buddy allocator is not constrained by the notion of a split address space. The bitmap allocator could only manage either small pages or large pages but not both at the same time. Thus the bottom of the address space was for small pages, the top for large pages. Although, that split is not removed quite yet, the new allocator enables that to happen. The buddy allocator is also very scalable. It manages the relatively small comptag space to the enormous GPU VA space and everything in between. This is important since the GPU has lots of different sized spaces that need managing. Currently there are certain limitations. For one the allocator does not handle the fixed allocations from CUDA very well. It can do so but with certain caveats. The PTE page size is always set to small. This means the BA may place other small page allocations in the buddies around the fixed allocation. It does this to avoid having large and small page allocations in the same PDE. Change-Id: I501cd15af03611536490137331d43761c402c7f9 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: http://git-master/r/740694 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/vgpu')
-rw-r--r--drivers/gpu/nvgpu/vgpu/ltc_vgpu.c5
-rw-r--r--drivers/gpu/nvgpu/vgpu/mm_vgpu.c36
2 files changed, 16 insertions, 25 deletions
diff --git a/drivers/gpu/nvgpu/vgpu/ltc_vgpu.c b/drivers/gpu/nvgpu/vgpu/ltc_vgpu.c
index 1beac216..211e34b5 100644
--- a/drivers/gpu/nvgpu/vgpu/ltc_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/ltc_vgpu.c
@@ -41,9 +41,8 @@ static int vgpu_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr)
41 if (max_comptag_lines < 2) 41 if (max_comptag_lines < 2)
42 return -ENXIO; 42 return -ENXIO;
43 43
44 gk20a_allocator_init(&gr->comp_tags, "comptag", 44 __gk20a_allocator_init(&gr->comp_tags, NULL, "comptag",
45 1, /* start */ 45 1, max_comptag_lines - 1, 1, 10, 0); /* length*/
46 max_comptag_lines - 1); /* length*/
47 return 0; 46 return 0;
48} 47}
49 48
diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
index 94e4602f..855aac0d 100644
--- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
@@ -243,11 +243,9 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share,
243 struct tegra_vgpu_as_share_params *p = &msg.params.as_share; 243 struct tegra_vgpu_as_share_params *p = &msg.params.as_share;
244 struct mm_gk20a *mm = &g->mm; 244 struct mm_gk20a *mm = &g->mm;
245 struct vm_gk20a *vm; 245 struct vm_gk20a *vm;
246 u32 num_small_pages, num_large_pages, low_hole_pages;
247 u64 small_vma_size, large_vma_size; 246 u64 small_vma_size, large_vma_size;
248 char name[32]; 247 char name[32];
249 int err, i; 248 int err, i;
250 u32 start;
251 249
252 /* note: keep the page sizes sorted lowest to highest here */ 250 /* note: keep the page sizes sorted lowest to highest here */
253 u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { 251 u32 gmmu_page_sizes[gmmu_nr_page_sizes] = {
@@ -294,33 +292,27 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share,
294 small_vma_size = (u64)16 << 30; 292 small_vma_size = (u64)16 << 30;
295 large_vma_size = vm->va_limit - small_vma_size; 293 large_vma_size = vm->va_limit - small_vma_size;
296 294
297 num_small_pages = (u32)(small_vma_size >>
298 ilog2(vm->gmmu_page_sizes[gmmu_page_size_small]));
299
300 /* num_pages above is without regard to the low-side hole. */
301 low_hole_pages = (vm->va_start >>
302 ilog2(vm->gmmu_page_sizes[gmmu_page_size_small]));
303
304 snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id, 295 snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,
305 gmmu_page_sizes[gmmu_page_size_small]>>10); 296 gmmu_page_sizes[gmmu_page_size_small]>>10);
306 err = gk20a_allocator_init(&vm->vma[gmmu_page_size_small], 297 err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_small],
307 name, 298 vm, name,
308 low_hole_pages, /*start*/ 299 vm->va_start,
309 num_small_pages - low_hole_pages);/* length*/ 300 small_vma_size - vm->va_start,
301 SZ_4K,
302 GPU_BALLOC_MAX_ORDER,
303 GPU_BALLOC_GVA_SPACE);
310 if (err) 304 if (err)
311 goto clean_up_share; 305 goto clean_up_share;
312 306
313 start = (u32)(small_vma_size >>
314 ilog2(vm->gmmu_page_sizes[gmmu_page_size_big]));
315 num_large_pages = (u32)(large_vma_size >>
316 ilog2(vm->gmmu_page_sizes[gmmu_page_size_big]));
317
318 snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id, 307 snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,
319 gmmu_page_sizes[gmmu_page_size_big]>>10); 308 gmmu_page_sizes[gmmu_page_size_big]>>10);
320 err = gk20a_allocator_init(&vm->vma[gmmu_page_size_big], 309 err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_big],
321 name, 310 vm, name,
322 start, /* start */ 311 small_vma_size,
323 num_large_pages); /* length */ 312 large_vma_size,
313 big_page_size,
314 GPU_BALLOC_MAX_ORDER,
315 GPU_BALLOC_GVA_SPACE);
324 if (err) 316 if (err)
325 goto clean_up_small_allocator; 317 goto clean_up_small_allocator;
326 318