diff options
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 194 |
1 files changed, 54 insertions, 140 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 5d1ff563..c11414b5 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -132,10 +132,8 @@ static void gk20a_mm_delete_priv(void *_priv) | |||
132 | 132 | ||
133 | if (priv->comptags.lines) { | 133 | if (priv->comptags.lines) { |
134 | BUG_ON(!priv->comptag_allocator); | 134 | BUG_ON(!priv->comptag_allocator); |
135 | priv->comptag_allocator->free(priv->comptag_allocator, | 135 | gk20a_bfree(priv->comptag_allocator, |
136 | priv->comptags.offset, | 136 | priv->comptags.real_offset); |
137 | priv->comptags.allocated_lines, | ||
138 | 1); | ||
139 | } | 137 | } |
140 | 138 | ||
141 | /* Free buffer states */ | 139 | /* Free buffer states */ |
@@ -224,10 +222,9 @@ static int gk20a_alloc_comptags(struct gk20a *g, | |||
224 | u32 lines, bool user_mappable) | 222 | u32 lines, bool user_mappable) |
225 | { | 223 | { |
226 | struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev); | 224 | struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev); |
227 | u32 offset = 0; | ||
228 | int err; | ||
229 | u32 ctaglines_to_allocate; | 225 | u32 ctaglines_to_allocate; |
230 | u32 ctagline_align; | 226 | u32 ctagline_align = 1; |
227 | u32 offset; | ||
231 | const u32 aggregate_cacheline_sz = | 228 | const u32 aggregate_cacheline_sz = |
232 | g->gr.cacheline_size * g->gr.slices_per_ltc * | 229 | g->gr.cacheline_size * g->gr.slices_per_ltc * |
233 | g->ltc_count; | 230 | g->ltc_count; |
@@ -241,7 +238,6 @@ static int gk20a_alloc_comptags(struct gk20a *g, | |||
241 | 238 | ||
242 | if (!user_mappable) { | 239 | if (!user_mappable) { |
243 | ctaglines_to_allocate = lines; | 240 | ctaglines_to_allocate = lines; |
244 | ctagline_align = 1; | ||
245 | } else { | 241 | } else { |
246 | /* Unfortunately, we cannot use allocation alignment | 242 | /* Unfortunately, we cannot use allocation alignment |
247 | * here, since compbits per cacheline is not always a | 243 | * here, since compbits per cacheline is not always a |
@@ -273,71 +269,25 @@ static int gk20a_alloc_comptags(struct gk20a *g, | |||
273 | 269 | ||
274 | if (ctaglines_to_allocate < lines) | 270 | if (ctaglines_to_allocate < lines) |
275 | return -EINVAL; /* integer overflow */ | 271 | return -EINVAL; /* integer overflow */ |
272 | pr_info("user-mapped CTAGS: %u\n", ctaglines_to_allocate); | ||
276 | } | 273 | } |
277 | 274 | ||
278 | /* store the allocator so we can use it when we free the ctags */ | 275 | /* store the allocator so we can use it when we free the ctags */ |
279 | priv->comptag_allocator = allocator; | 276 | priv->comptag_allocator = allocator; |
280 | err = allocator->alloc(allocator, &offset, | 277 | offset = gk20a_balloc(allocator, ctaglines_to_allocate); |
281 | ctaglines_to_allocate, 1); | 278 | if (!offset) |
282 | if (!err) { | 279 | return -ENOMEM; |
283 | const u32 alignment_lines = | ||
284 | DIV_ROUND_UP(offset, ctagline_align) * ctagline_align - | ||
285 | offset; | ||
286 | |||
287 | /* prune the preceding ctaglines that were allocated | ||
288 | for alignment */ | ||
289 | if (alignment_lines) { | ||
290 | /* free alignment lines */ | ||
291 | int tmp= | ||
292 | allocator->free(allocator, offset, | ||
293 | alignment_lines, | ||
294 | 1); | ||
295 | WARN_ON(tmp); | ||
296 | |||
297 | offset += alignment_lines; | ||
298 | ctaglines_to_allocate -= alignment_lines; | ||
299 | } | ||
300 | 280 | ||
301 | /* check if we can prune the trailing, too */ | 281 | priv->comptags.lines = lines; |
302 | if (user_mappable) | 282 | priv->comptags.real_offset = offset; |
303 | { | ||
304 | u32 needed_cachelines = | ||
305 | DIV_ROUND_UP(lines, g->gr.comptags_per_cacheline); | ||
306 | |||
307 | u32 first_unneeded_cacheline = | ||
308 | DIV_ROUND_UP(round_up(needed_cachelines * | ||
309 | aggregate_cacheline_sz, | ||
310 | small_pgsz), | ||
311 | aggregate_cacheline_sz); | ||
312 | u32 needed_ctaglines = | ||
313 | first_unneeded_cacheline * | ||
314 | g->gr.comptags_per_cacheline; | ||
315 | |||
316 | if (needed_ctaglines < ctaglines_to_allocate) { | ||
317 | /* free alignment lines */ | ||
318 | int tmp= | ||
319 | allocator->free( | ||
320 | allocator, | ||
321 | offset + needed_ctaglines, | ||
322 | (ctaglines_to_allocate - | ||
323 | needed_ctaglines), | ||
324 | 1); | ||
325 | WARN_ON(tmp); | ||
326 | |||
327 | ctaglines_to_allocate = needed_ctaglines; | ||
328 | } | ||
329 | } | ||
330 | |||
331 | priv->comptags.offset = offset; | ||
332 | priv->comptags.lines = lines; | ||
333 | priv->comptags.allocated_lines = ctaglines_to_allocate; | ||
334 | priv->comptags.user_mappable = user_mappable; | ||
335 | } | ||
336 | return err; | ||
337 | } | ||
338 | 283 | ||
284 | if (user_mappable) | ||
285 | offset = DIV_ROUND_UP(offset, ctagline_align) * ctagline_align; | ||
339 | 286 | ||
287 | priv->comptags.offset = offset; | ||
340 | 288 | ||
289 | return 0; | ||
290 | } | ||
341 | 291 | ||
342 | static int gk20a_init_mm_reset_enable_hw(struct gk20a *g) | 292 | static int gk20a_init_mm_reset_enable_hw(struct gk20a *g) |
343 | { | 293 | { |
@@ -889,14 +839,12 @@ static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset) | |||
889 | } | 839 | } |
890 | 840 | ||
891 | u64 gk20a_vm_alloc_va(struct vm_gk20a *vm, | 841 | u64 gk20a_vm_alloc_va(struct vm_gk20a *vm, |
892 | u64 size, | 842 | u64 size, |
893 | enum gmmu_pgsz_gk20a gmmu_pgsz_idx) | 843 | enum gmmu_pgsz_gk20a gmmu_pgsz_idx) |
894 | 844 | ||
895 | { | 845 | { |
896 | struct gk20a_allocator *vma = &vm->vma[gmmu_pgsz_idx]; | 846 | struct gk20a_allocator *vma = &vm->vma[gmmu_pgsz_idx]; |
897 | int err; | ||
898 | u64 offset; | 847 | u64 offset; |
899 | u32 start_page_nr = 0, num_pages; | ||
900 | u64 gmmu_page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx]; | 848 | u64 gmmu_page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx]; |
901 | 849 | ||
902 | if (gmmu_pgsz_idx >= gmmu_nr_page_sizes) { | 850 | if (gmmu_pgsz_idx >= gmmu_nr_page_sizes) { |
@@ -912,28 +860,19 @@ u64 gk20a_vm_alloc_va(struct vm_gk20a *vm, | |||
912 | 860 | ||
913 | } | 861 | } |
914 | 862 | ||
915 | /* be certain we round up to gmmu_page_size if needed */ | 863 | /* Be certain we round up to gmmu_page_size if needed */ |
916 | /* TBD: DIV_ROUND_UP -> undefined reference to __aeabi_uldivmod */ | ||
917 | size = (size + ((u64)gmmu_page_size - 1)) & ~((u64)gmmu_page_size - 1); | 864 | size = (size + ((u64)gmmu_page_size - 1)) & ~((u64)gmmu_page_size - 1); |
918 | |||
919 | gk20a_dbg_info("size=0x%llx @ pgsz=%dKB", size, | 865 | gk20a_dbg_info("size=0x%llx @ pgsz=%dKB", size, |
920 | vm->gmmu_page_sizes[gmmu_pgsz_idx]>>10); | 866 | vm->gmmu_page_sizes[gmmu_pgsz_idx]>>10); |
921 | 867 | ||
922 | /* The vma allocator represents page accounting. */ | 868 | offset = gk20a_balloc(vma, size); |
923 | num_pages = size >> ilog2(vm->gmmu_page_sizes[gmmu_pgsz_idx]); | 869 | if (!offset) { |
924 | |||
925 | err = vma->alloc(vma, &start_page_nr, num_pages, 1); | ||
926 | |||
927 | if (err) { | ||
928 | gk20a_err(dev_from_vm(vm), | 870 | gk20a_err(dev_from_vm(vm), |
929 | "%s oom: sz=0x%llx", vma->name, size); | 871 | "%s oom: sz=0x%llx", vma->name, size); |
930 | return 0; | 872 | return 0; |
931 | } | 873 | } |
932 | 874 | ||
933 | offset = (u64)start_page_nr << | ||
934 | ilog2(vm->gmmu_page_sizes[gmmu_pgsz_idx]); | ||
935 | gk20a_dbg_fn("%s found addr: 0x%llx", vma->name, offset); | 875 | gk20a_dbg_fn("%s found addr: 0x%llx", vma->name, offset); |
936 | |||
937 | return offset; | 876 | return offset; |
938 | } | 877 | } |
939 | 878 | ||
@@ -942,25 +881,12 @@ int gk20a_vm_free_va(struct vm_gk20a *vm, | |||
942 | enum gmmu_pgsz_gk20a pgsz_idx) | 881 | enum gmmu_pgsz_gk20a pgsz_idx) |
943 | { | 882 | { |
944 | struct gk20a_allocator *vma = &vm->vma[pgsz_idx]; | 883 | struct gk20a_allocator *vma = &vm->vma[pgsz_idx]; |
945 | u32 page_size = vm->gmmu_page_sizes[pgsz_idx]; | ||
946 | u32 page_shift = ilog2(page_size); | ||
947 | u32 start_page_nr, num_pages; | ||
948 | int err; | ||
949 | 884 | ||
950 | gk20a_dbg_info("%s free addr=0x%llx, size=0x%llx", | 885 | gk20a_dbg_info("%s free addr=0x%llx, size=0x%llx", |
951 | vma->name, offset, size); | 886 | vma->name, offset, size); |
887 | gk20a_bfree(vma, offset); | ||
952 | 888 | ||
953 | start_page_nr = (u32)(offset >> page_shift); | 889 | return 0; |
954 | num_pages = (u32)((size + page_size - 1) >> page_shift); | ||
955 | |||
956 | err = vma->free(vma, start_page_nr, num_pages, 1); | ||
957 | if (err) { | ||
958 | gk20a_err(dev_from_vm(vm), | ||
959 | "not found: offset=0x%llx, sz=0x%llx", | ||
960 | offset, size); | ||
961 | } | ||
962 | |||
963 | return err; | ||
964 | } | 890 | } |
965 | 891 | ||
966 | static int insert_mapped_buffer(struct rb_root *root, | 892 | static int insert_mapped_buffer(struct rb_root *root, |
@@ -1136,7 +1062,7 @@ static int validate_fixed_buffer(struct vm_gk20a *vm, | |||
1136 | 1062 | ||
1137 | if (map_offset & (vm->gmmu_page_sizes[bfr->pgsz_idx] - 1)) { | 1063 | if (map_offset & (vm->gmmu_page_sizes[bfr->pgsz_idx] - 1)) { |
1138 | gk20a_err(dev, "map offset must be buffer page size aligned 0x%llx", | 1064 | gk20a_err(dev, "map offset must be buffer page size aligned 0x%llx", |
1139 | map_offset); | 1065 | map_offset); |
1140 | return -EINVAL; | 1066 | return -EINVAL; |
1141 | } | 1067 | } |
1142 | 1068 | ||
@@ -2433,7 +2359,6 @@ int gk20a_init_vm(struct mm_gk20a *mm, | |||
2433 | char *name) | 2359 | char *name) |
2434 | { | 2360 | { |
2435 | int err, i; | 2361 | int err, i; |
2436 | u32 num_small_pages, num_large_pages, low_hole_pages; | ||
2437 | char alloc_name[32]; | 2362 | char alloc_name[32]; |
2438 | u64 small_vma_size, large_vma_size; | 2363 | u64 small_vma_size, large_vma_size; |
2439 | u32 pde_lo, pde_hi; | 2364 | u32 pde_lo, pde_hi; |
@@ -2494,34 +2419,31 @@ int gk20a_init_vm(struct mm_gk20a *mm, | |||
2494 | large_vma_size = vm->va_limit - small_vma_size; | 2419 | large_vma_size = vm->va_limit - small_vma_size; |
2495 | } | 2420 | } |
2496 | 2421 | ||
2497 | num_small_pages = (u32)(small_vma_size >> | ||
2498 | ilog2(vm->gmmu_page_sizes[gmmu_page_size_small])); | ||
2499 | |||
2500 | /* num_pages above is without regard to the low-side hole. */ | ||
2501 | low_hole_pages = (vm->va_start >> | ||
2502 | ilog2(vm->gmmu_page_sizes[gmmu_page_size_small])); | ||
2503 | |||
2504 | snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", name, | 2422 | snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", name, |
2505 | vm->gmmu_page_sizes[gmmu_page_size_small]>>10); | 2423 | vm->gmmu_page_sizes[gmmu_page_size_small]>>10); |
2506 | err = gk20a_allocator_init(&vm->vma[gmmu_page_size_small], | 2424 | err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_small], |
2507 | alloc_name, | 2425 | vm, alloc_name, |
2508 | low_hole_pages, /*start*/ | 2426 | vm->va_start, |
2509 | num_small_pages - low_hole_pages);/* length*/ | 2427 | small_vma_size - vm->va_start, |
2428 | SZ_4K, | ||
2429 | GPU_BALLOC_MAX_ORDER, | ||
2430 | GPU_BALLOC_GVA_SPACE); | ||
2510 | if (err) | 2431 | if (err) |
2511 | goto clean_up_ptes; | 2432 | goto clean_up_ptes; |
2512 | 2433 | ||
2513 | if (big_pages) { | 2434 | if (big_pages) { |
2514 | u32 start = (u32)(small_vma_size >> | ||
2515 | ilog2(vm->gmmu_page_sizes[gmmu_page_size_big])); | ||
2516 | num_large_pages = (u32)(large_vma_size >> | ||
2517 | ilog2(vm->gmmu_page_sizes[gmmu_page_size_big])); | ||
2518 | |||
2519 | snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", | 2435 | snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", |
2520 | name, vm->gmmu_page_sizes[gmmu_page_size_big]>>10); | 2436 | name, vm->gmmu_page_sizes[gmmu_page_size_big]>>10); |
2521 | err = gk20a_allocator_init(&vm->vma[gmmu_page_size_big], | 2437 | /* |
2522 | alloc_name, | 2438 | * Big page VMA starts at the end of the small page VMA. |
2523 | start, /* start */ | 2439 | */ |
2524 | num_large_pages); /* length */ | 2440 | err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_big], |
2441 | vm, alloc_name, | ||
2442 | small_vma_size, | ||
2443 | large_vma_size, | ||
2444 | big_page_size, | ||
2445 | GPU_BALLOC_MAX_ORDER, | ||
2446 | GPU_BALLOC_GVA_SPACE); | ||
2525 | if (err) | 2447 | if (err) |
2526 | goto clean_up_small_allocator; | 2448 | goto clean_up_small_allocator; |
2527 | } | 2449 | } |
@@ -2602,9 +2524,9 @@ int gk20a_vm_release_share(struct gk20a_as_share *as_share) | |||
2602 | int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, | 2524 | int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, |
2603 | struct nvgpu_as_alloc_space_args *args) | 2525 | struct nvgpu_as_alloc_space_args *args) |
2604 | 2526 | ||
2605 | { int err = -ENOMEM; | 2527 | { |
2528 | int err = -ENOMEM; | ||
2606 | int pgsz_idx = gmmu_page_size_small; | 2529 | int pgsz_idx = gmmu_page_size_small; |
2607 | u32 start_page_nr; | ||
2608 | struct gk20a_allocator *vma; | 2530 | struct gk20a_allocator *vma; |
2609 | struct vm_gk20a *vm = as_share->vm; | 2531 | struct vm_gk20a *vm = as_share->vm; |
2610 | struct gk20a *g = vm->mm->g; | 2532 | struct gk20a *g = vm->mm->g; |
@@ -2635,21 +2557,19 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, | |||
2635 | goto clean_up; | 2557 | goto clean_up; |
2636 | } | 2558 | } |
2637 | 2559 | ||
2638 | start_page_nr = 0; | 2560 | vma = &vm->vma[pgsz_idx]; |
2639 | if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET) | 2561 | if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET) |
2640 | start_page_nr = (u32)(args->o_a.offset >> | 2562 | vaddr_start = gk20a_balloc_fixed(vma, args->o_a.offset, |
2641 | ilog2(vm->gmmu_page_sizes[pgsz_idx])); | 2563 | (u64)args->pages * |
2564 | (u64)args->page_size); | ||
2565 | else | ||
2566 | vaddr_start = gk20a_balloc(vma, args->pages * args->page_size); | ||
2642 | 2567 | ||
2643 | vma = &vm->vma[pgsz_idx]; | 2568 | if (!vaddr_start) { |
2644 | err = vma->alloc(vma, &start_page_nr, args->pages, 1); | ||
2645 | if (err) { | ||
2646 | kfree(va_node); | 2569 | kfree(va_node); |
2647 | goto clean_up; | 2570 | goto clean_up; |
2648 | } | 2571 | } |
2649 | 2572 | ||
2650 | vaddr_start = (u64)start_page_nr << | ||
2651 | ilog2(vm->gmmu_page_sizes[pgsz_idx]); | ||
2652 | |||
2653 | va_node->vaddr_start = vaddr_start; | 2573 | va_node->vaddr_start = vaddr_start; |
2654 | va_node->size = (u64)args->page_size * (u64)args->pages; | 2574 | va_node->size = (u64)args->page_size * (u64)args->pages; |
2655 | va_node->pgsz_idx = pgsz_idx; | 2575 | va_node->pgsz_idx = pgsz_idx; |
@@ -2673,7 +2593,7 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, | |||
2673 | true); | 2593 | true); |
2674 | if (!map_offset) { | 2594 | if (!map_offset) { |
2675 | mutex_unlock(&vm->update_gmmu_lock); | 2595 | mutex_unlock(&vm->update_gmmu_lock); |
2676 | vma->free(vma, start_page_nr, args->pages, 1); | 2596 | gk20a_bfree(vma, vaddr_start); |
2677 | kfree(va_node); | 2597 | kfree(va_node); |
2678 | goto clean_up; | 2598 | goto clean_up; |
2679 | } | 2599 | } |
@@ -2685,6 +2605,7 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, | |||
2685 | mutex_unlock(&vm->update_gmmu_lock); | 2605 | mutex_unlock(&vm->update_gmmu_lock); |
2686 | 2606 | ||
2687 | args->o_a.offset = vaddr_start; | 2607 | args->o_a.offset = vaddr_start; |
2608 | err = 0; | ||
2688 | 2609 | ||
2689 | clean_up: | 2610 | clean_up: |
2690 | return err; | 2611 | return err; |
@@ -2695,7 +2616,6 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share, | |||
2695 | { | 2616 | { |
2696 | int err = -ENOMEM; | 2617 | int err = -ENOMEM; |
2697 | int pgsz_idx; | 2618 | int pgsz_idx; |
2698 | u32 start_page_nr; | ||
2699 | struct gk20a_allocator *vma; | 2619 | struct gk20a_allocator *vma; |
2700 | struct vm_gk20a *vm = as_share->vm; | 2620 | struct vm_gk20a *vm = as_share->vm; |
2701 | struct vm_reserved_va_node *va_node; | 2621 | struct vm_reserved_va_node *va_node; |
@@ -2708,14 +2628,8 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share, | |||
2708 | pgsz_idx = __nv_gmmu_va_is_upper(vm, args->offset) ? | 2628 | pgsz_idx = __nv_gmmu_va_is_upper(vm, args->offset) ? |
2709 | gmmu_page_size_big : gmmu_page_size_small; | 2629 | gmmu_page_size_big : gmmu_page_size_small; |
2710 | 2630 | ||
2711 | start_page_nr = (u32)(args->offset >> | ||
2712 | ilog2(vm->gmmu_page_sizes[pgsz_idx])); | ||
2713 | |||
2714 | vma = &vm->vma[pgsz_idx]; | 2631 | vma = &vm->vma[pgsz_idx]; |
2715 | err = vma->free(vma, start_page_nr, args->pages, 1); | 2632 | gk20a_bfree(vma, args->offset); |
2716 | |||
2717 | if (err) | ||
2718 | goto clean_up; | ||
2719 | 2633 | ||
2720 | mutex_lock(&vm->update_gmmu_lock); | 2634 | mutex_lock(&vm->update_gmmu_lock); |
2721 | va_node = addr_to_reservation(vm, args->offset); | 2635 | va_node = addr_to_reservation(vm, args->offset); |
@@ -2745,8 +2659,8 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share, | |||
2745 | kfree(va_node); | 2659 | kfree(va_node); |
2746 | } | 2660 | } |
2747 | mutex_unlock(&vm->update_gmmu_lock); | 2661 | mutex_unlock(&vm->update_gmmu_lock); |
2662 | err = 0; | ||
2748 | 2663 | ||
2749 | clean_up: | ||
2750 | return err; | 2664 | return err; |
2751 | } | 2665 | } |
2752 | 2666 | ||