summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c194
1 files changed, 54 insertions, 140 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 5d1ff563..c11414b5 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -132,10 +132,8 @@ static void gk20a_mm_delete_priv(void *_priv)
132 132
133 if (priv->comptags.lines) { 133 if (priv->comptags.lines) {
134 BUG_ON(!priv->comptag_allocator); 134 BUG_ON(!priv->comptag_allocator);
135 priv->comptag_allocator->free(priv->comptag_allocator, 135 gk20a_bfree(priv->comptag_allocator,
136 priv->comptags.offset, 136 priv->comptags.real_offset);
137 priv->comptags.allocated_lines,
138 1);
139 } 137 }
140 138
141 /* Free buffer states */ 139 /* Free buffer states */
@@ -224,10 +222,9 @@ static int gk20a_alloc_comptags(struct gk20a *g,
224 u32 lines, bool user_mappable) 222 u32 lines, bool user_mappable)
225{ 223{
226 struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev); 224 struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev);
227 u32 offset = 0;
228 int err;
229 u32 ctaglines_to_allocate; 225 u32 ctaglines_to_allocate;
230 u32 ctagline_align; 226 u32 ctagline_align = 1;
227 u32 offset;
231 const u32 aggregate_cacheline_sz = 228 const u32 aggregate_cacheline_sz =
232 g->gr.cacheline_size * g->gr.slices_per_ltc * 229 g->gr.cacheline_size * g->gr.slices_per_ltc *
233 g->ltc_count; 230 g->ltc_count;
@@ -241,7 +238,6 @@ static int gk20a_alloc_comptags(struct gk20a *g,
241 238
242 if (!user_mappable) { 239 if (!user_mappable) {
243 ctaglines_to_allocate = lines; 240 ctaglines_to_allocate = lines;
244 ctagline_align = 1;
245 } else { 241 } else {
246 /* Unfortunately, we cannot use allocation alignment 242 /* Unfortunately, we cannot use allocation alignment
247 * here, since compbits per cacheline is not always a 243 * here, since compbits per cacheline is not always a
@@ -273,71 +269,25 @@ static int gk20a_alloc_comptags(struct gk20a *g,
273 269
274 if (ctaglines_to_allocate < lines) 270 if (ctaglines_to_allocate < lines)
275 return -EINVAL; /* integer overflow */ 271 return -EINVAL; /* integer overflow */
272 pr_info("user-mapped CTAGS: %u\n", ctaglines_to_allocate);
276 } 273 }
277 274
278 /* store the allocator so we can use it when we free the ctags */ 275 /* store the allocator so we can use it when we free the ctags */
279 priv->comptag_allocator = allocator; 276 priv->comptag_allocator = allocator;
280 err = allocator->alloc(allocator, &offset, 277 offset = gk20a_balloc(allocator, ctaglines_to_allocate);
281 ctaglines_to_allocate, 1); 278 if (!offset)
282 if (!err) { 279 return -ENOMEM;
283 const u32 alignment_lines =
284 DIV_ROUND_UP(offset, ctagline_align) * ctagline_align -
285 offset;
286
287 /* prune the preceding ctaglines that were allocated
288 for alignment */
289 if (alignment_lines) {
290 /* free alignment lines */
291 int tmp=
292 allocator->free(allocator, offset,
293 alignment_lines,
294 1);
295 WARN_ON(tmp);
296
297 offset += alignment_lines;
298 ctaglines_to_allocate -= alignment_lines;
299 }
300 280
301 /* check if we can prune the trailing, too */ 281 priv->comptags.lines = lines;
302 if (user_mappable) 282 priv->comptags.real_offset = offset;
303 {
304 u32 needed_cachelines =
305 DIV_ROUND_UP(lines, g->gr.comptags_per_cacheline);
306
307 u32 first_unneeded_cacheline =
308 DIV_ROUND_UP(round_up(needed_cachelines *
309 aggregate_cacheline_sz,
310 small_pgsz),
311 aggregate_cacheline_sz);
312 u32 needed_ctaglines =
313 first_unneeded_cacheline *
314 g->gr.comptags_per_cacheline;
315
316 if (needed_ctaglines < ctaglines_to_allocate) {
317 /* free alignment lines */
318 int tmp=
319 allocator->free(
320 allocator,
321 offset + needed_ctaglines,
322 (ctaglines_to_allocate -
323 needed_ctaglines),
324 1);
325 WARN_ON(tmp);
326
327 ctaglines_to_allocate = needed_ctaglines;
328 }
329 }
330
331 priv->comptags.offset = offset;
332 priv->comptags.lines = lines;
333 priv->comptags.allocated_lines = ctaglines_to_allocate;
334 priv->comptags.user_mappable = user_mappable;
335 }
336 return err;
337}
338 283
284 if (user_mappable)
285 offset = DIV_ROUND_UP(offset, ctagline_align) * ctagline_align;
339 286
287 priv->comptags.offset = offset;
340 288
289 return 0;
290}
341 291
342static int gk20a_init_mm_reset_enable_hw(struct gk20a *g) 292static int gk20a_init_mm_reset_enable_hw(struct gk20a *g)
343{ 293{
@@ -889,14 +839,12 @@ static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset)
889} 839}
890 840
891u64 gk20a_vm_alloc_va(struct vm_gk20a *vm, 841u64 gk20a_vm_alloc_va(struct vm_gk20a *vm,
892 u64 size, 842 u64 size,
893 enum gmmu_pgsz_gk20a gmmu_pgsz_idx) 843 enum gmmu_pgsz_gk20a gmmu_pgsz_idx)
894 844
895{ 845{
896 struct gk20a_allocator *vma = &vm->vma[gmmu_pgsz_idx]; 846 struct gk20a_allocator *vma = &vm->vma[gmmu_pgsz_idx];
897 int err;
898 u64 offset; 847 u64 offset;
899 u32 start_page_nr = 0, num_pages;
900 u64 gmmu_page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx]; 848 u64 gmmu_page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx];
901 849
902 if (gmmu_pgsz_idx >= gmmu_nr_page_sizes) { 850 if (gmmu_pgsz_idx >= gmmu_nr_page_sizes) {
@@ -912,28 +860,19 @@ u64 gk20a_vm_alloc_va(struct vm_gk20a *vm,
912 860
913 } 861 }
914 862
915 /* be certain we round up to gmmu_page_size if needed */ 863 /* Be certain we round up to gmmu_page_size if needed */
916 /* TBD: DIV_ROUND_UP -> undefined reference to __aeabi_uldivmod */
917 size = (size + ((u64)gmmu_page_size - 1)) & ~((u64)gmmu_page_size - 1); 864 size = (size + ((u64)gmmu_page_size - 1)) & ~((u64)gmmu_page_size - 1);
918
919 gk20a_dbg_info("size=0x%llx @ pgsz=%dKB", size, 865 gk20a_dbg_info("size=0x%llx @ pgsz=%dKB", size,
920 vm->gmmu_page_sizes[gmmu_pgsz_idx]>>10); 866 vm->gmmu_page_sizes[gmmu_pgsz_idx]>>10);
921 867
922 /* The vma allocator represents page accounting. */ 868 offset = gk20a_balloc(vma, size);
923 num_pages = size >> ilog2(vm->gmmu_page_sizes[gmmu_pgsz_idx]); 869 if (!offset) {
924
925 err = vma->alloc(vma, &start_page_nr, num_pages, 1);
926
927 if (err) {
928 gk20a_err(dev_from_vm(vm), 870 gk20a_err(dev_from_vm(vm),
929 "%s oom: sz=0x%llx", vma->name, size); 871 "%s oom: sz=0x%llx", vma->name, size);
930 return 0; 872 return 0;
931 } 873 }
932 874
933 offset = (u64)start_page_nr <<
934 ilog2(vm->gmmu_page_sizes[gmmu_pgsz_idx]);
935 gk20a_dbg_fn("%s found addr: 0x%llx", vma->name, offset); 875 gk20a_dbg_fn("%s found addr: 0x%llx", vma->name, offset);
936
937 return offset; 876 return offset;
938} 877}
939 878
@@ -942,25 +881,12 @@ int gk20a_vm_free_va(struct vm_gk20a *vm,
942 enum gmmu_pgsz_gk20a pgsz_idx) 881 enum gmmu_pgsz_gk20a pgsz_idx)
943{ 882{
944 struct gk20a_allocator *vma = &vm->vma[pgsz_idx]; 883 struct gk20a_allocator *vma = &vm->vma[pgsz_idx];
945 u32 page_size = vm->gmmu_page_sizes[pgsz_idx];
946 u32 page_shift = ilog2(page_size);
947 u32 start_page_nr, num_pages;
948 int err;
949 884
950 gk20a_dbg_info("%s free addr=0x%llx, size=0x%llx", 885 gk20a_dbg_info("%s free addr=0x%llx, size=0x%llx",
951 vma->name, offset, size); 886 vma->name, offset, size);
887 gk20a_bfree(vma, offset);
952 888
953 start_page_nr = (u32)(offset >> page_shift); 889 return 0;
954 num_pages = (u32)((size + page_size - 1) >> page_shift);
955
956 err = vma->free(vma, start_page_nr, num_pages, 1);
957 if (err) {
958 gk20a_err(dev_from_vm(vm),
959 "not found: offset=0x%llx, sz=0x%llx",
960 offset, size);
961 }
962
963 return err;
964} 890}
965 891
966static int insert_mapped_buffer(struct rb_root *root, 892static int insert_mapped_buffer(struct rb_root *root,
@@ -1136,7 +1062,7 @@ static int validate_fixed_buffer(struct vm_gk20a *vm,
1136 1062
1137 if (map_offset & (vm->gmmu_page_sizes[bfr->pgsz_idx] - 1)) { 1063 if (map_offset & (vm->gmmu_page_sizes[bfr->pgsz_idx] - 1)) {
1138 gk20a_err(dev, "map offset must be buffer page size aligned 0x%llx", 1064 gk20a_err(dev, "map offset must be buffer page size aligned 0x%llx",
1139 map_offset); 1065 map_offset);
1140 return -EINVAL; 1066 return -EINVAL;
1141 } 1067 }
1142 1068
@@ -2433,7 +2359,6 @@ int gk20a_init_vm(struct mm_gk20a *mm,
2433 char *name) 2359 char *name)
2434{ 2360{
2435 int err, i; 2361 int err, i;
2436 u32 num_small_pages, num_large_pages, low_hole_pages;
2437 char alloc_name[32]; 2362 char alloc_name[32];
2438 u64 small_vma_size, large_vma_size; 2363 u64 small_vma_size, large_vma_size;
2439 u32 pde_lo, pde_hi; 2364 u32 pde_lo, pde_hi;
@@ -2494,34 +2419,31 @@ int gk20a_init_vm(struct mm_gk20a *mm,
2494 large_vma_size = vm->va_limit - small_vma_size; 2419 large_vma_size = vm->va_limit - small_vma_size;
2495 } 2420 }
2496 2421
2497 num_small_pages = (u32)(small_vma_size >>
2498 ilog2(vm->gmmu_page_sizes[gmmu_page_size_small]));
2499
2500 /* num_pages above is without regard to the low-side hole. */
2501 low_hole_pages = (vm->va_start >>
2502 ilog2(vm->gmmu_page_sizes[gmmu_page_size_small]));
2503
2504 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", name, 2422 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", name,
2505 vm->gmmu_page_sizes[gmmu_page_size_small]>>10); 2423 vm->gmmu_page_sizes[gmmu_page_size_small]>>10);
2506 err = gk20a_allocator_init(&vm->vma[gmmu_page_size_small], 2424 err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_small],
2507 alloc_name, 2425 vm, alloc_name,
2508 low_hole_pages, /*start*/ 2426 vm->va_start,
2509 num_small_pages - low_hole_pages);/* length*/ 2427 small_vma_size - vm->va_start,
2428 SZ_4K,
2429 GPU_BALLOC_MAX_ORDER,
2430 GPU_BALLOC_GVA_SPACE);
2510 if (err) 2431 if (err)
2511 goto clean_up_ptes; 2432 goto clean_up_ptes;
2512 2433
2513 if (big_pages) { 2434 if (big_pages) {
2514 u32 start = (u32)(small_vma_size >>
2515 ilog2(vm->gmmu_page_sizes[gmmu_page_size_big]));
2516 num_large_pages = (u32)(large_vma_size >>
2517 ilog2(vm->gmmu_page_sizes[gmmu_page_size_big]));
2518
2519 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", 2435 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB",
2520 name, vm->gmmu_page_sizes[gmmu_page_size_big]>>10); 2436 name, vm->gmmu_page_sizes[gmmu_page_size_big]>>10);
2521 err = gk20a_allocator_init(&vm->vma[gmmu_page_size_big], 2437 /*
2522 alloc_name, 2438 * Big page VMA starts at the end of the small page VMA.
2523 start, /* start */ 2439 */
2524 num_large_pages); /* length */ 2440 err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_big],
2441 vm, alloc_name,
2442 small_vma_size,
2443 large_vma_size,
2444 big_page_size,
2445 GPU_BALLOC_MAX_ORDER,
2446 GPU_BALLOC_GVA_SPACE);
2525 if (err) 2447 if (err)
2526 goto clean_up_small_allocator; 2448 goto clean_up_small_allocator;
2527 } 2449 }
@@ -2602,9 +2524,9 @@ int gk20a_vm_release_share(struct gk20a_as_share *as_share)
2602int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, 2524int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
2603 struct nvgpu_as_alloc_space_args *args) 2525 struct nvgpu_as_alloc_space_args *args)
2604 2526
2605{ int err = -ENOMEM; 2527{
2528 int err = -ENOMEM;
2606 int pgsz_idx = gmmu_page_size_small; 2529 int pgsz_idx = gmmu_page_size_small;
2607 u32 start_page_nr;
2608 struct gk20a_allocator *vma; 2530 struct gk20a_allocator *vma;
2609 struct vm_gk20a *vm = as_share->vm; 2531 struct vm_gk20a *vm = as_share->vm;
2610 struct gk20a *g = vm->mm->g; 2532 struct gk20a *g = vm->mm->g;
@@ -2635,21 +2557,19 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
2635 goto clean_up; 2557 goto clean_up;
2636 } 2558 }
2637 2559
2638 start_page_nr = 0; 2560 vma = &vm->vma[pgsz_idx];
2639 if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET) 2561 if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET)
2640 start_page_nr = (u32)(args->o_a.offset >> 2562 vaddr_start = gk20a_balloc_fixed(vma, args->o_a.offset,
2641 ilog2(vm->gmmu_page_sizes[pgsz_idx])); 2563 (u64)args->pages *
2564 (u64)args->page_size);
2565 else
2566 vaddr_start = gk20a_balloc(vma, args->pages * args->page_size);
2642 2567
2643 vma = &vm->vma[pgsz_idx]; 2568 if (!vaddr_start) {
2644 err = vma->alloc(vma, &start_page_nr, args->pages, 1);
2645 if (err) {
2646 kfree(va_node); 2569 kfree(va_node);
2647 goto clean_up; 2570 goto clean_up;
2648 } 2571 }
2649 2572
2650 vaddr_start = (u64)start_page_nr <<
2651 ilog2(vm->gmmu_page_sizes[pgsz_idx]);
2652
2653 va_node->vaddr_start = vaddr_start; 2573 va_node->vaddr_start = vaddr_start;
2654 va_node->size = (u64)args->page_size * (u64)args->pages; 2574 va_node->size = (u64)args->page_size * (u64)args->pages;
2655 va_node->pgsz_idx = pgsz_idx; 2575 va_node->pgsz_idx = pgsz_idx;
@@ -2673,7 +2593,7 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
2673 true); 2593 true);
2674 if (!map_offset) { 2594 if (!map_offset) {
2675 mutex_unlock(&vm->update_gmmu_lock); 2595 mutex_unlock(&vm->update_gmmu_lock);
2676 vma->free(vma, start_page_nr, args->pages, 1); 2596 gk20a_bfree(vma, vaddr_start);
2677 kfree(va_node); 2597 kfree(va_node);
2678 goto clean_up; 2598 goto clean_up;
2679 } 2599 }
@@ -2685,6 +2605,7 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
2685 mutex_unlock(&vm->update_gmmu_lock); 2605 mutex_unlock(&vm->update_gmmu_lock);
2686 2606
2687 args->o_a.offset = vaddr_start; 2607 args->o_a.offset = vaddr_start;
2608 err = 0;
2688 2609
2689clean_up: 2610clean_up:
2690 return err; 2611 return err;
@@ -2695,7 +2616,6 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
2695{ 2616{
2696 int err = -ENOMEM; 2617 int err = -ENOMEM;
2697 int pgsz_idx; 2618 int pgsz_idx;
2698 u32 start_page_nr;
2699 struct gk20a_allocator *vma; 2619 struct gk20a_allocator *vma;
2700 struct vm_gk20a *vm = as_share->vm; 2620 struct vm_gk20a *vm = as_share->vm;
2701 struct vm_reserved_va_node *va_node; 2621 struct vm_reserved_va_node *va_node;
@@ -2708,14 +2628,8 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
2708 pgsz_idx = __nv_gmmu_va_is_upper(vm, args->offset) ? 2628 pgsz_idx = __nv_gmmu_va_is_upper(vm, args->offset) ?
2709 gmmu_page_size_big : gmmu_page_size_small; 2629 gmmu_page_size_big : gmmu_page_size_small;
2710 2630
2711 start_page_nr = (u32)(args->offset >>
2712 ilog2(vm->gmmu_page_sizes[pgsz_idx]));
2713
2714 vma = &vm->vma[pgsz_idx]; 2631 vma = &vm->vma[pgsz_idx];
2715 err = vma->free(vma, start_page_nr, args->pages, 1); 2632 gk20a_bfree(vma, args->offset);
2716
2717 if (err)
2718 goto clean_up;
2719 2633
2720 mutex_lock(&vm->update_gmmu_lock); 2634 mutex_lock(&vm->update_gmmu_lock);
2721 va_node = addr_to_reservation(vm, args->offset); 2635 va_node = addr_to_reservation(vm, args->offset);
@@ -2745,8 +2659,8 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
2745 kfree(va_node); 2659 kfree(va_node);
2746 } 2660 }
2747 mutex_unlock(&vm->update_gmmu_lock); 2661 mutex_unlock(&vm->update_gmmu_lock);
2662 err = 0;
2748 2663
2749clean_up:
2750 return err; 2664 return err;
2751} 2665}
2752 2666