summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
diff options
context:
space:
mode:
authorTerje Bergstrom <tbergstrom@nvidia.com>2015-05-11 22:28:11 -0400
committerHiroshi Doyu <hdoyu@nvidia.com>2015-05-12 05:46:39 -0400
commitaa25a952ea2b19a081fa746f043228c270f43f94 (patch)
treee640f945d82c598645b0b2c5cbc6eb194444026d /drivers/gpu/nvgpu/gk20a/mm_gk20a.c
parenta2e852364582e9c337f52bc53ccc33877c8f3b47 (diff)
Revert "gpu: nvgpu: New allocator for VA space"
This reverts commit 2e235ac150fa4af8632c9abf0f109a10973a0bf5. Change-Id: I3aa745152124c2bc09c6c6dc5aeb1084ae7e08a4 Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/741469 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: Hiroshi Doyu <hdoyu@nvidia.com> Tested-by: Hiroshi Doyu <hdoyu@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c194
1 files changed, 140 insertions, 54 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index c11414b5..5d1ff563 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -132,8 +132,10 @@ static void gk20a_mm_delete_priv(void *_priv)
132 132
133 if (priv->comptags.lines) { 133 if (priv->comptags.lines) {
134 BUG_ON(!priv->comptag_allocator); 134 BUG_ON(!priv->comptag_allocator);
135 gk20a_bfree(priv->comptag_allocator, 135 priv->comptag_allocator->free(priv->comptag_allocator,
136 priv->comptags.real_offset); 136 priv->comptags.offset,
137 priv->comptags.allocated_lines,
138 1);
137 } 139 }
138 140
139 /* Free buffer states */ 141 /* Free buffer states */
@@ -222,9 +224,10 @@ static int gk20a_alloc_comptags(struct gk20a *g,
222 u32 lines, bool user_mappable) 224 u32 lines, bool user_mappable)
223{ 225{
224 struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev); 226 struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev);
227 u32 offset = 0;
228 int err;
225 u32 ctaglines_to_allocate; 229 u32 ctaglines_to_allocate;
226 u32 ctagline_align = 1; 230 u32 ctagline_align;
227 u32 offset;
228 const u32 aggregate_cacheline_sz = 231 const u32 aggregate_cacheline_sz =
229 g->gr.cacheline_size * g->gr.slices_per_ltc * 232 g->gr.cacheline_size * g->gr.slices_per_ltc *
230 g->ltc_count; 233 g->ltc_count;
@@ -238,6 +241,7 @@ static int gk20a_alloc_comptags(struct gk20a *g,
238 241
239 if (!user_mappable) { 242 if (!user_mappable) {
240 ctaglines_to_allocate = lines; 243 ctaglines_to_allocate = lines;
244 ctagline_align = 1;
241 } else { 245 } else {
242 /* Unfortunately, we cannot use allocation alignment 246 /* Unfortunately, we cannot use allocation alignment
243 * here, since compbits per cacheline is not always a 247 * here, since compbits per cacheline is not always a
@@ -269,25 +273,71 @@ static int gk20a_alloc_comptags(struct gk20a *g,
269 273
270 if (ctaglines_to_allocate < lines) 274 if (ctaglines_to_allocate < lines)
271 return -EINVAL; /* integer overflow */ 275 return -EINVAL; /* integer overflow */
272 pr_info("user-mapped CTAGS: %u\n", ctaglines_to_allocate);
273 } 276 }
274 277
275 /* store the allocator so we can use it when we free the ctags */ 278 /* store the allocator so we can use it when we free the ctags */
276 priv->comptag_allocator = allocator; 279 priv->comptag_allocator = allocator;
277 offset = gk20a_balloc(allocator, ctaglines_to_allocate); 280 err = allocator->alloc(allocator, &offset,
278 if (!offset) 281 ctaglines_to_allocate, 1);
279 return -ENOMEM; 282 if (!err) {
283 const u32 alignment_lines =
284 DIV_ROUND_UP(offset, ctagline_align) * ctagline_align -
285 offset;
286
287 /* prune the preceding ctaglines that were allocated
288 for alignment */
289 if (alignment_lines) {
290 /* free alignment lines */
291 int tmp=
292 allocator->free(allocator, offset,
293 alignment_lines,
294 1);
295 WARN_ON(tmp);
296
297 offset += alignment_lines;
298 ctaglines_to_allocate -= alignment_lines;
299 }
280 300
281 priv->comptags.lines = lines; 301 /* check if we can prune the trailing, too */
282 priv->comptags.real_offset = offset; 302 if (user_mappable)
303 {
304 u32 needed_cachelines =
305 DIV_ROUND_UP(lines, g->gr.comptags_per_cacheline);
306
307 u32 first_unneeded_cacheline =
308 DIV_ROUND_UP(round_up(needed_cachelines *
309 aggregate_cacheline_sz,
310 small_pgsz),
311 aggregate_cacheline_sz);
312 u32 needed_ctaglines =
313 first_unneeded_cacheline *
314 g->gr.comptags_per_cacheline;
315
316 if (needed_ctaglines < ctaglines_to_allocate) {
317 /* free alignment lines */
318 int tmp=
319 allocator->free(
320 allocator,
321 offset + needed_ctaglines,
322 (ctaglines_to_allocate -
323 needed_ctaglines),
324 1);
325 WARN_ON(tmp);
326
327 ctaglines_to_allocate = needed_ctaglines;
328 }
329 }
330
331 priv->comptags.offset = offset;
332 priv->comptags.lines = lines;
333 priv->comptags.allocated_lines = ctaglines_to_allocate;
334 priv->comptags.user_mappable = user_mappable;
335 }
336 return err;
337}
283 338
284 if (user_mappable)
285 offset = DIV_ROUND_UP(offset, ctagline_align) * ctagline_align;
286 339
287 priv->comptags.offset = offset;
288 340
289 return 0;
290}
291 341
292static int gk20a_init_mm_reset_enable_hw(struct gk20a *g) 342static int gk20a_init_mm_reset_enable_hw(struct gk20a *g)
293{ 343{
@@ -839,12 +889,14 @@ static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset)
839} 889}
840 890
841u64 gk20a_vm_alloc_va(struct vm_gk20a *vm, 891u64 gk20a_vm_alloc_va(struct vm_gk20a *vm,
842 u64 size, 892 u64 size,
843 enum gmmu_pgsz_gk20a gmmu_pgsz_idx) 893 enum gmmu_pgsz_gk20a gmmu_pgsz_idx)
844 894
845{ 895{
846 struct gk20a_allocator *vma = &vm->vma[gmmu_pgsz_idx]; 896 struct gk20a_allocator *vma = &vm->vma[gmmu_pgsz_idx];
897 int err;
847 u64 offset; 898 u64 offset;
899 u32 start_page_nr = 0, num_pages;
848 u64 gmmu_page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx]; 900 u64 gmmu_page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx];
849 901
850 if (gmmu_pgsz_idx >= gmmu_nr_page_sizes) { 902 if (gmmu_pgsz_idx >= gmmu_nr_page_sizes) {
@@ -860,19 +912,28 @@ u64 gk20a_vm_alloc_va(struct vm_gk20a *vm,
860 912
861 } 913 }
862 914
863 /* Be certain we round up to gmmu_page_size if needed */ 915 /* be certain we round up to gmmu_page_size if needed */
916 /* TBD: DIV_ROUND_UP -> undefined reference to __aeabi_uldivmod */
864 size = (size + ((u64)gmmu_page_size - 1)) & ~((u64)gmmu_page_size - 1); 917 size = (size + ((u64)gmmu_page_size - 1)) & ~((u64)gmmu_page_size - 1);
918
865 gk20a_dbg_info("size=0x%llx @ pgsz=%dKB", size, 919 gk20a_dbg_info("size=0x%llx @ pgsz=%dKB", size,
866 vm->gmmu_page_sizes[gmmu_pgsz_idx]>>10); 920 vm->gmmu_page_sizes[gmmu_pgsz_idx]>>10);
867 921
868 offset = gk20a_balloc(vma, size); 922 /* The vma allocator represents page accounting. */
869 if (!offset) { 923 num_pages = size >> ilog2(vm->gmmu_page_sizes[gmmu_pgsz_idx]);
924
925 err = vma->alloc(vma, &start_page_nr, num_pages, 1);
926
927 if (err) {
870 gk20a_err(dev_from_vm(vm), 928 gk20a_err(dev_from_vm(vm),
871 "%s oom: sz=0x%llx", vma->name, size); 929 "%s oom: sz=0x%llx", vma->name, size);
872 return 0; 930 return 0;
873 } 931 }
874 932
933 offset = (u64)start_page_nr <<
934 ilog2(vm->gmmu_page_sizes[gmmu_pgsz_idx]);
875 gk20a_dbg_fn("%s found addr: 0x%llx", vma->name, offset); 935 gk20a_dbg_fn("%s found addr: 0x%llx", vma->name, offset);
936
876 return offset; 937 return offset;
877} 938}
878 939
@@ -881,12 +942,25 @@ int gk20a_vm_free_va(struct vm_gk20a *vm,
881 enum gmmu_pgsz_gk20a pgsz_idx) 942 enum gmmu_pgsz_gk20a pgsz_idx)
882{ 943{
883 struct gk20a_allocator *vma = &vm->vma[pgsz_idx]; 944 struct gk20a_allocator *vma = &vm->vma[pgsz_idx];
945 u32 page_size = vm->gmmu_page_sizes[pgsz_idx];
946 u32 page_shift = ilog2(page_size);
947 u32 start_page_nr, num_pages;
948 int err;
884 949
885 gk20a_dbg_info("%s free addr=0x%llx, size=0x%llx", 950 gk20a_dbg_info("%s free addr=0x%llx, size=0x%llx",
886 vma->name, offset, size); 951 vma->name, offset, size);
887 gk20a_bfree(vma, offset);
888 952
889 return 0; 953 start_page_nr = (u32)(offset >> page_shift);
954 num_pages = (u32)((size + page_size - 1) >> page_shift);
955
956 err = vma->free(vma, start_page_nr, num_pages, 1);
957 if (err) {
958 gk20a_err(dev_from_vm(vm),
959 "not found: offset=0x%llx, sz=0x%llx",
960 offset, size);
961 }
962
963 return err;
890} 964}
891 965
892static int insert_mapped_buffer(struct rb_root *root, 966static int insert_mapped_buffer(struct rb_root *root,
@@ -1062,7 +1136,7 @@ static int validate_fixed_buffer(struct vm_gk20a *vm,
1062 1136
1063 if (map_offset & (vm->gmmu_page_sizes[bfr->pgsz_idx] - 1)) { 1137 if (map_offset & (vm->gmmu_page_sizes[bfr->pgsz_idx] - 1)) {
1064 gk20a_err(dev, "map offset must be buffer page size aligned 0x%llx", 1138 gk20a_err(dev, "map offset must be buffer page size aligned 0x%llx",
1065 map_offset); 1139 map_offset);
1066 return -EINVAL; 1140 return -EINVAL;
1067 } 1141 }
1068 1142
@@ -2359,6 +2433,7 @@ int gk20a_init_vm(struct mm_gk20a *mm,
2359 char *name) 2433 char *name)
2360{ 2434{
2361 int err, i; 2435 int err, i;
2436 u32 num_small_pages, num_large_pages, low_hole_pages;
2362 char alloc_name[32]; 2437 char alloc_name[32];
2363 u64 small_vma_size, large_vma_size; 2438 u64 small_vma_size, large_vma_size;
2364 u32 pde_lo, pde_hi; 2439 u32 pde_lo, pde_hi;
@@ -2419,31 +2494,34 @@ int gk20a_init_vm(struct mm_gk20a *mm,
2419 large_vma_size = vm->va_limit - small_vma_size; 2494 large_vma_size = vm->va_limit - small_vma_size;
2420 } 2495 }
2421 2496
2497 num_small_pages = (u32)(small_vma_size >>
2498 ilog2(vm->gmmu_page_sizes[gmmu_page_size_small]));
2499
2500 /* num_pages above is without regard to the low-side hole. */
2501 low_hole_pages = (vm->va_start >>
2502 ilog2(vm->gmmu_page_sizes[gmmu_page_size_small]));
2503
2422 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", name, 2504 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", name,
2423 vm->gmmu_page_sizes[gmmu_page_size_small]>>10); 2505 vm->gmmu_page_sizes[gmmu_page_size_small]>>10);
2424 err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_small], 2506 err = gk20a_allocator_init(&vm->vma[gmmu_page_size_small],
2425 vm, alloc_name, 2507 alloc_name,
2426 vm->va_start, 2508 low_hole_pages, /*start*/
2427 small_vma_size - vm->va_start, 2509 num_small_pages - low_hole_pages);/* length*/
2428 SZ_4K,
2429 GPU_BALLOC_MAX_ORDER,
2430 GPU_BALLOC_GVA_SPACE);
2431 if (err) 2510 if (err)
2432 goto clean_up_ptes; 2511 goto clean_up_ptes;
2433 2512
2434 if (big_pages) { 2513 if (big_pages) {
2514 u32 start = (u32)(small_vma_size >>
2515 ilog2(vm->gmmu_page_sizes[gmmu_page_size_big]));
2516 num_large_pages = (u32)(large_vma_size >>
2517 ilog2(vm->gmmu_page_sizes[gmmu_page_size_big]));
2518
2435 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", 2519 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB",
2436 name, vm->gmmu_page_sizes[gmmu_page_size_big]>>10); 2520 name, vm->gmmu_page_sizes[gmmu_page_size_big]>>10);
2437 /* 2521 err = gk20a_allocator_init(&vm->vma[gmmu_page_size_big],
2438 * Big page VMA starts at the end of the small page VMA. 2522 alloc_name,
2439 */ 2523 start, /* start */
2440 err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_big], 2524 num_large_pages); /* length */
2441 vm, alloc_name,
2442 small_vma_size,
2443 large_vma_size,
2444 big_page_size,
2445 GPU_BALLOC_MAX_ORDER,
2446 GPU_BALLOC_GVA_SPACE);
2447 if (err) 2525 if (err)
2448 goto clean_up_small_allocator; 2526 goto clean_up_small_allocator;
2449 } 2527 }
@@ -2524,9 +2602,9 @@ int gk20a_vm_release_share(struct gk20a_as_share *as_share)
2524int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, 2602int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
2525 struct nvgpu_as_alloc_space_args *args) 2603 struct nvgpu_as_alloc_space_args *args)
2526 2604
2527{ 2605{ int err = -ENOMEM;
2528 int err = -ENOMEM;
2529 int pgsz_idx = gmmu_page_size_small; 2606 int pgsz_idx = gmmu_page_size_small;
2607 u32 start_page_nr;
2530 struct gk20a_allocator *vma; 2608 struct gk20a_allocator *vma;
2531 struct vm_gk20a *vm = as_share->vm; 2609 struct vm_gk20a *vm = as_share->vm;
2532 struct gk20a *g = vm->mm->g; 2610 struct gk20a *g = vm->mm->g;
@@ -2557,19 +2635,21 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
2557 goto clean_up; 2635 goto clean_up;
2558 } 2636 }
2559 2637
2560 vma = &vm->vma[pgsz_idx]; 2638 start_page_nr = 0;
2561 if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET) 2639 if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET)
2562 vaddr_start = gk20a_balloc_fixed(vma, args->o_a.offset, 2640 start_page_nr = (u32)(args->o_a.offset >>
2563 (u64)args->pages * 2641 ilog2(vm->gmmu_page_sizes[pgsz_idx]));
2564 (u64)args->page_size);
2565 else
2566 vaddr_start = gk20a_balloc(vma, args->pages * args->page_size);
2567 2642
2568 if (!vaddr_start) { 2643 vma = &vm->vma[pgsz_idx];
2644 err = vma->alloc(vma, &start_page_nr, args->pages, 1);
2645 if (err) {
2569 kfree(va_node); 2646 kfree(va_node);
2570 goto clean_up; 2647 goto clean_up;
2571 } 2648 }
2572 2649
2650 vaddr_start = (u64)start_page_nr <<
2651 ilog2(vm->gmmu_page_sizes[pgsz_idx]);
2652
2573 va_node->vaddr_start = vaddr_start; 2653 va_node->vaddr_start = vaddr_start;
2574 va_node->size = (u64)args->page_size * (u64)args->pages; 2654 va_node->size = (u64)args->page_size * (u64)args->pages;
2575 va_node->pgsz_idx = pgsz_idx; 2655 va_node->pgsz_idx = pgsz_idx;
@@ -2593,7 +2673,7 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
2593 true); 2673 true);
2594 if (!map_offset) { 2674 if (!map_offset) {
2595 mutex_unlock(&vm->update_gmmu_lock); 2675 mutex_unlock(&vm->update_gmmu_lock);
2596 gk20a_bfree(vma, vaddr_start); 2676 vma->free(vma, start_page_nr, args->pages, 1);
2597 kfree(va_node); 2677 kfree(va_node);
2598 goto clean_up; 2678 goto clean_up;
2599 } 2679 }
@@ -2605,7 +2685,6 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
2605 mutex_unlock(&vm->update_gmmu_lock); 2685 mutex_unlock(&vm->update_gmmu_lock);
2606 2686
2607 args->o_a.offset = vaddr_start; 2687 args->o_a.offset = vaddr_start;
2608 err = 0;
2609 2688
2610clean_up: 2689clean_up:
2611 return err; 2690 return err;
@@ -2616,6 +2695,7 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
2616{ 2695{
2617 int err = -ENOMEM; 2696 int err = -ENOMEM;
2618 int pgsz_idx; 2697 int pgsz_idx;
2698 u32 start_page_nr;
2619 struct gk20a_allocator *vma; 2699 struct gk20a_allocator *vma;
2620 struct vm_gk20a *vm = as_share->vm; 2700 struct vm_gk20a *vm = as_share->vm;
2621 struct vm_reserved_va_node *va_node; 2701 struct vm_reserved_va_node *va_node;
@@ -2628,8 +2708,14 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
2628 pgsz_idx = __nv_gmmu_va_is_upper(vm, args->offset) ? 2708 pgsz_idx = __nv_gmmu_va_is_upper(vm, args->offset) ?
2629 gmmu_page_size_big : gmmu_page_size_small; 2709 gmmu_page_size_big : gmmu_page_size_small;
2630 2710
2711 start_page_nr = (u32)(args->offset >>
2712 ilog2(vm->gmmu_page_sizes[pgsz_idx]));
2713
2631 vma = &vm->vma[pgsz_idx]; 2714 vma = &vm->vma[pgsz_idx];
2632 gk20a_bfree(vma, args->offset); 2715 err = vma->free(vma, start_page_nr, args->pages, 1);
2716
2717 if (err)
2718 goto clean_up;
2633 2719
2634 mutex_lock(&vm->update_gmmu_lock); 2720 mutex_lock(&vm->update_gmmu_lock);
2635 va_node = addr_to_reservation(vm, args->offset); 2721 va_node = addr_to_reservation(vm, args->offset);
@@ -2659,8 +2745,8 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
2659 kfree(va_node); 2745 kfree(va_node);
2660 } 2746 }
2661 mutex_unlock(&vm->update_gmmu_lock); 2747 mutex_unlock(&vm->update_gmmu_lock);
2662 err = 0;
2663 2748
2749clean_up:
2664 return err; 2750 return err;
2665} 2751}
2666 2752