diff options
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 202 |
1 files changed, 149 insertions, 53 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index a38db709..735c262a 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -132,8 +132,10 @@ static void gk20a_mm_delete_priv(void *_priv) | |||
132 | 132 | ||
133 | if (priv->comptags.lines) { | 133 | if (priv->comptags.lines) { |
134 | BUG_ON(!priv->comptag_allocator); | 134 | BUG_ON(!priv->comptag_allocator); |
135 | gk20a_bfree(priv->comptag_allocator, | 135 | priv->comptag_allocator->free(priv->comptag_allocator, |
136 | priv->comptags.real_offset); | 136 | priv->comptags.offset, |
137 | priv->comptags.allocated_lines, | ||
138 | 1); | ||
137 | } | 139 | } |
138 | 140 | ||
139 | /* Free buffer states */ | 141 | /* Free buffer states */ |
@@ -224,9 +226,10 @@ static int gk20a_alloc_comptags(struct gk20a *g, | |||
224 | u32 *ctag_map_win_ctagline) | 226 | u32 *ctag_map_win_ctagline) |
225 | { | 227 | { |
226 | struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev); | 228 | struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev); |
229 | u32 offset = 0; | ||
230 | int err; | ||
227 | u32 ctaglines_to_allocate; | 231 | u32 ctaglines_to_allocate; |
228 | u32 ctagline_align = 1; | 232 | u32 ctagline_align; |
229 | u32 offset; | ||
230 | const u32 aggregate_cacheline_sz = | 233 | const u32 aggregate_cacheline_sz = |
231 | g->gr.cacheline_size * g->gr.slices_per_ltc * | 234 | g->gr.cacheline_size * g->gr.slices_per_ltc * |
232 | g->ltc_count; | 235 | g->ltc_count; |
@@ -240,6 +243,7 @@ static int gk20a_alloc_comptags(struct gk20a *g, | |||
240 | 243 | ||
241 | if (!user_mappable) { | 244 | if (!user_mappable) { |
242 | ctaglines_to_allocate = lines; | 245 | ctaglines_to_allocate = lines; |
246 | ctagline_align = 1; | ||
243 | } else { | 247 | } else { |
244 | /* Unfortunately, we cannot use allocation alignment | 248 | /* Unfortunately, we cannot use allocation alignment |
245 | * here, since compbits per cacheline is not always a | 249 | * here, since compbits per cacheline is not always a |
@@ -271,26 +275,82 @@ static int gk20a_alloc_comptags(struct gk20a *g, | |||
271 | 275 | ||
272 | if (ctaglines_to_allocate < lines) | 276 | if (ctaglines_to_allocate < lines) |
273 | return -EINVAL; /* integer overflow */ | 277 | return -EINVAL; /* integer overflow */ |
274 | pr_info("user-mapped CTAGS: %u\n", ctaglines_to_allocate); | ||
275 | } | 278 | } |
276 | 279 | ||
277 | /* store the allocator so we can use it when we free the ctags */ | 280 | /* store the allocator so we can use it when we free the ctags */ |
278 | priv->comptag_allocator = allocator; | 281 | priv->comptag_allocator = allocator; |
279 | offset = gk20a_balloc(allocator, ctaglines_to_allocate); | 282 | err = allocator->alloc(allocator, &offset, |
280 | if (!offset) | 283 | ctaglines_to_allocate, 1); |
281 | return -ENOMEM; | 284 | if (!err) { |
285 | const u32 alignment_lines = | ||
286 | DIV_ROUND_UP(offset, ctagline_align) * ctagline_align - | ||
287 | offset; | ||
288 | |||
289 | /* prune the preceding ctaglines that were allocated | ||
290 | for alignment */ | ||
291 | if (alignment_lines) { | ||
292 | /* free alignment lines */ | ||
293 | int tmp= | ||
294 | allocator->free(allocator, offset, | ||
295 | alignment_lines, | ||
296 | 1); | ||
297 | WARN_ON(tmp); | ||
298 | |||
299 | offset += alignment_lines; | ||
300 | ctaglines_to_allocate -= alignment_lines; | ||
301 | } | ||
282 | 302 | ||
283 | priv->comptags.lines = lines; | 303 | /* check if we can prune the trailing, too */ |
284 | priv->comptags.real_offset = offset; | 304 | if (user_mappable) |
305 | { | ||
306 | u32 needed_cachelines = | ||
307 | DIV_ROUND_UP(lines, g->gr.comptags_per_cacheline); | ||
308 | |||
309 | u32 first_unneeded_cacheline = | ||
310 | DIV_ROUND_UP(round_up(needed_cachelines * | ||
311 | aggregate_cacheline_sz, | ||
312 | small_pgsz), | ||
313 | aggregate_cacheline_sz); | ||
314 | u32 needed_ctaglines = | ||
315 | first_unneeded_cacheline * | ||
316 | g->gr.comptags_per_cacheline; | ||
317 | |||
318 | u64 win_size; | ||
319 | |||
320 | if (needed_ctaglines < ctaglines_to_allocate) { | ||
321 | /* free alignment lines */ | ||
322 | int tmp= | ||
323 | allocator->free( | ||
324 | allocator, | ||
325 | offset + needed_ctaglines, | ||
326 | (ctaglines_to_allocate - | ||
327 | needed_ctaglines), | ||
328 | 1); | ||
329 | WARN_ON(tmp); | ||
330 | |||
331 | ctaglines_to_allocate = needed_ctaglines; | ||
332 | } | ||
285 | 333 | ||
286 | if (user_mappable) | 334 | *ctag_map_win_ctagline = offset; |
287 | offset = DIV_ROUND_UP(offset, ctagline_align) * ctagline_align; | 335 | win_size = |
336 | DIV_ROUND_UP(lines, | ||
337 | g->gr.comptags_per_cacheline) * | ||
338 | aggregate_cacheline_sz; | ||
288 | 339 | ||
289 | priv->comptags.offset = offset; | 340 | *ctag_map_win_size = round_up(win_size, small_pgsz); |
341 | } | ||
290 | 342 | ||
291 | return 0; | 343 | priv->comptags.offset = offset; |
344 | priv->comptags.lines = lines; | ||
345 | priv->comptags.allocated_lines = ctaglines_to_allocate; | ||
346 | priv->comptags.user_mappable = user_mappable; | ||
347 | } | ||
348 | return err; | ||
292 | } | 349 | } |
293 | 350 | ||
351 | |||
352 | |||
353 | |||
294 | static int gk20a_init_mm_reset_enable_hw(struct gk20a *g) | 354 | static int gk20a_init_mm_reset_enable_hw(struct gk20a *g) |
295 | { | 355 | { |
296 | gk20a_dbg_fn(""); | 356 | gk20a_dbg_fn(""); |
@@ -841,12 +901,14 @@ static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset) | |||
841 | } | 901 | } |
842 | 902 | ||
843 | u64 gk20a_vm_alloc_va(struct vm_gk20a *vm, | 903 | u64 gk20a_vm_alloc_va(struct vm_gk20a *vm, |
844 | u64 size, | 904 | u64 size, |
845 | enum gmmu_pgsz_gk20a gmmu_pgsz_idx) | 905 | enum gmmu_pgsz_gk20a gmmu_pgsz_idx) |
846 | 906 | ||
847 | { | 907 | { |
848 | struct gk20a_allocator *vma = &vm->vma[gmmu_pgsz_idx]; | 908 | struct gk20a_allocator *vma = &vm->vma[gmmu_pgsz_idx]; |
909 | int err; | ||
849 | u64 offset; | 910 | u64 offset; |
911 | u32 start_page_nr = 0, num_pages; | ||
850 | u64 gmmu_page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx]; | 912 | u64 gmmu_page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx]; |
851 | 913 | ||
852 | if (gmmu_pgsz_idx >= gmmu_nr_page_sizes) { | 914 | if (gmmu_pgsz_idx >= gmmu_nr_page_sizes) { |
@@ -862,19 +924,28 @@ u64 gk20a_vm_alloc_va(struct vm_gk20a *vm, | |||
862 | 924 | ||
863 | } | 925 | } |
864 | 926 | ||
865 | /* Be certain we round up to gmmu_page_size if needed */ | 927 | /* be certain we round up to gmmu_page_size if needed */ |
928 | /* TBD: DIV_ROUND_UP -> undefined reference to __aeabi_uldivmod */ | ||
866 | size = (size + ((u64)gmmu_page_size - 1)) & ~((u64)gmmu_page_size - 1); | 929 | size = (size + ((u64)gmmu_page_size - 1)) & ~((u64)gmmu_page_size - 1); |
930 | |||
867 | gk20a_dbg_info("size=0x%llx @ pgsz=%dKB", size, | 931 | gk20a_dbg_info("size=0x%llx @ pgsz=%dKB", size, |
868 | vm->gmmu_page_sizes[gmmu_pgsz_idx]>>10); | 932 | vm->gmmu_page_sizes[gmmu_pgsz_idx]>>10); |
869 | 933 | ||
870 | offset = gk20a_balloc(vma, size); | 934 | /* The vma allocator represents page accounting. */ |
871 | if (!offset) { | 935 | num_pages = size >> ilog2(vm->gmmu_page_sizes[gmmu_pgsz_idx]); |
936 | |||
937 | err = vma->alloc(vma, &start_page_nr, num_pages, 1); | ||
938 | |||
939 | if (err) { | ||
872 | gk20a_err(dev_from_vm(vm), | 940 | gk20a_err(dev_from_vm(vm), |
873 | "%s oom: sz=0x%llx", vma->name, size); | 941 | "%s oom: sz=0x%llx", vma->name, size); |
874 | return 0; | 942 | return 0; |
875 | } | 943 | } |
876 | 944 | ||
945 | offset = (u64)start_page_nr << | ||
946 | ilog2(vm->gmmu_page_sizes[gmmu_pgsz_idx]); | ||
877 | gk20a_dbg_fn("%s found addr: 0x%llx", vma->name, offset); | 947 | gk20a_dbg_fn("%s found addr: 0x%llx", vma->name, offset); |
948 | |||
878 | return offset; | 949 | return offset; |
879 | } | 950 | } |
880 | 951 | ||
@@ -883,12 +954,25 @@ int gk20a_vm_free_va(struct vm_gk20a *vm, | |||
883 | enum gmmu_pgsz_gk20a pgsz_idx) | 954 | enum gmmu_pgsz_gk20a pgsz_idx) |
884 | { | 955 | { |
885 | struct gk20a_allocator *vma = &vm->vma[pgsz_idx]; | 956 | struct gk20a_allocator *vma = &vm->vma[pgsz_idx]; |
957 | u32 page_size = vm->gmmu_page_sizes[pgsz_idx]; | ||
958 | u32 page_shift = ilog2(page_size); | ||
959 | u32 start_page_nr, num_pages; | ||
960 | int err; | ||
886 | 961 | ||
887 | gk20a_dbg_info("%s free addr=0x%llx, size=0x%llx", | 962 | gk20a_dbg_info("%s free addr=0x%llx, size=0x%llx", |
888 | vma->name, offset, size); | 963 | vma->name, offset, size); |
889 | gk20a_bfree(vma, offset); | ||
890 | 964 | ||
891 | return 0; | 965 | start_page_nr = (u32)(offset >> page_shift); |
966 | num_pages = (u32)((size + page_size - 1) >> page_shift); | ||
967 | |||
968 | err = vma->free(vma, start_page_nr, num_pages, 1); | ||
969 | if (err) { | ||
970 | gk20a_err(dev_from_vm(vm), | ||
971 | "not found: offset=0x%llx, sz=0x%llx", | ||
972 | offset, size); | ||
973 | } | ||
974 | |||
975 | return err; | ||
892 | } | 976 | } |
893 | 977 | ||
894 | static int insert_mapped_buffer(struct rb_root *root, | 978 | static int insert_mapped_buffer(struct rb_root *root, |
@@ -1085,7 +1169,7 @@ static int validate_fixed_buffer(struct vm_gk20a *vm, | |||
1085 | 1169 | ||
1086 | if (map_offset & (vm->gmmu_page_sizes[bfr->pgsz_idx] - 1)) { | 1170 | if (map_offset & (vm->gmmu_page_sizes[bfr->pgsz_idx] - 1)) { |
1087 | gk20a_err(dev, "map offset must be buffer page size aligned 0x%llx", | 1171 | gk20a_err(dev, "map offset must be buffer page size aligned 0x%llx", |
1088 | map_offset); | 1172 | map_offset); |
1089 | return -EINVAL; | 1173 | return -EINVAL; |
1090 | } | 1174 | } |
1091 | 1175 | ||
@@ -2529,6 +2613,7 @@ int gk20a_init_vm(struct mm_gk20a *mm, | |||
2529 | char *name) | 2613 | char *name) |
2530 | { | 2614 | { |
2531 | int err, i; | 2615 | int err, i; |
2616 | u32 num_small_pages, num_large_pages, low_hole_pages; | ||
2532 | char alloc_name[32]; | 2617 | char alloc_name[32]; |
2533 | u64 small_vma_size, large_vma_size; | 2618 | u64 small_vma_size, large_vma_size; |
2534 | u32 pde_lo, pde_hi; | 2619 | u32 pde_lo, pde_hi; |
@@ -2589,31 +2674,34 @@ int gk20a_init_vm(struct mm_gk20a *mm, | |||
2589 | large_vma_size = vm->va_limit - small_vma_size; | 2674 | large_vma_size = vm->va_limit - small_vma_size; |
2590 | } | 2675 | } |
2591 | 2676 | ||
2677 | num_small_pages = (u32)(small_vma_size >> | ||
2678 | ilog2(vm->gmmu_page_sizes[gmmu_page_size_small])); | ||
2679 | |||
2680 | /* num_pages above is without regard to the low-side hole. */ | ||
2681 | low_hole_pages = (vm->va_start >> | ||
2682 | ilog2(vm->gmmu_page_sizes[gmmu_page_size_small])); | ||
2683 | |||
2592 | snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", name, | 2684 | snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", name, |
2593 | vm->gmmu_page_sizes[gmmu_page_size_small]>>10); | 2685 | vm->gmmu_page_sizes[gmmu_page_size_small]>>10); |
2594 | err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_small], | 2686 | err = gk20a_allocator_init(&vm->vma[gmmu_page_size_small], |
2595 | vm, alloc_name, | 2687 | alloc_name, |
2596 | vm->va_start, | 2688 | low_hole_pages, /*start*/ |
2597 | small_vma_size - vm->va_start, | 2689 | num_small_pages - low_hole_pages);/* length*/ |
2598 | SZ_4K, | ||
2599 | GPU_BALLOC_MAX_ORDER, | ||
2600 | GPU_BALLOC_GVA_SPACE); | ||
2601 | if (err) | 2690 | if (err) |
2602 | goto clean_up_ptes; | 2691 | goto clean_up_ptes; |
2603 | 2692 | ||
2604 | if (big_pages) { | 2693 | if (big_pages) { |
2694 | u32 start = (u32)(small_vma_size >> | ||
2695 | ilog2(vm->gmmu_page_sizes[gmmu_page_size_big])); | ||
2696 | num_large_pages = (u32)(large_vma_size >> | ||
2697 | ilog2(vm->gmmu_page_sizes[gmmu_page_size_big])); | ||
2698 | |||
2605 | snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", | 2699 | snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", |
2606 | name, vm->gmmu_page_sizes[gmmu_page_size_big]>>10); | 2700 | name, vm->gmmu_page_sizes[gmmu_page_size_big]>>10); |
2607 | /* | 2701 | err = gk20a_allocator_init(&vm->vma[gmmu_page_size_big], |
2608 | * Big page VMA starts at the end of the small page VMA. | 2702 | alloc_name, |
2609 | */ | 2703 | start, /* start */ |
2610 | err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_big], | 2704 | num_large_pages); /* length */ |
2611 | vm, alloc_name, | ||
2612 | small_vma_size, | ||
2613 | large_vma_size, | ||
2614 | big_page_size, | ||
2615 | GPU_BALLOC_MAX_ORDER, | ||
2616 | GPU_BALLOC_GVA_SPACE); | ||
2617 | if (err) | 2705 | if (err) |
2618 | goto clean_up_small_allocator; | 2706 | goto clean_up_small_allocator; |
2619 | } | 2707 | } |
@@ -2694,9 +2782,9 @@ int gk20a_vm_release_share(struct gk20a_as_share *as_share) | |||
2694 | int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, | 2782 | int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, |
2695 | struct nvgpu_as_alloc_space_args *args) | 2783 | struct nvgpu_as_alloc_space_args *args) |
2696 | 2784 | ||
2697 | { | 2785 | { int err = -ENOMEM; |
2698 | int err = -ENOMEM; | ||
2699 | int pgsz_idx = gmmu_page_size_small; | 2786 | int pgsz_idx = gmmu_page_size_small; |
2787 | u32 start_page_nr; | ||
2700 | struct gk20a_allocator *vma; | 2788 | struct gk20a_allocator *vma; |
2701 | struct vm_gk20a *vm = as_share->vm; | 2789 | struct vm_gk20a *vm = as_share->vm; |
2702 | struct gk20a *g = vm->mm->g; | 2790 | struct gk20a *g = vm->mm->g; |
@@ -2727,19 +2815,21 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, | |||
2727 | goto clean_up; | 2815 | goto clean_up; |
2728 | } | 2816 | } |
2729 | 2817 | ||
2730 | vma = &vm->vma[pgsz_idx]; | 2818 | start_page_nr = 0; |
2731 | if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET) | 2819 | if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET) |
2732 | vaddr_start = gk20a_balloc_fixed(vma, args->o_a.offset, | 2820 | start_page_nr = (u32)(args->o_a.offset >> |
2733 | (u64)args->pages * | 2821 | ilog2(vm->gmmu_page_sizes[pgsz_idx])); |
2734 | (u64)args->page_size); | ||
2735 | else | ||
2736 | vaddr_start = gk20a_balloc(vma, args->pages * args->page_size); | ||
2737 | 2822 | ||
2738 | if (!vaddr_start) { | 2823 | vma = &vm->vma[pgsz_idx]; |
2824 | err = vma->alloc(vma, &start_page_nr, args->pages, 1); | ||
2825 | if (err) { | ||
2739 | kfree(va_node); | 2826 | kfree(va_node); |
2740 | goto clean_up; | 2827 | goto clean_up; |
2741 | } | 2828 | } |
2742 | 2829 | ||
2830 | vaddr_start = (u64)start_page_nr << | ||
2831 | ilog2(vm->gmmu_page_sizes[pgsz_idx]); | ||
2832 | |||
2743 | va_node->vaddr_start = vaddr_start; | 2833 | va_node->vaddr_start = vaddr_start; |
2744 | va_node->size = (u64)args->page_size * (u64)args->pages; | 2834 | va_node->size = (u64)args->page_size * (u64)args->pages; |
2745 | va_node->pgsz_idx = pgsz_idx; | 2835 | va_node->pgsz_idx = pgsz_idx; |
@@ -2763,7 +2853,7 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, | |||
2763 | true); | 2853 | true); |
2764 | if (!map_offset) { | 2854 | if (!map_offset) { |
2765 | mutex_unlock(&vm->update_gmmu_lock); | 2855 | mutex_unlock(&vm->update_gmmu_lock); |
2766 | gk20a_bfree(vma, vaddr_start); | 2856 | vma->free(vma, start_page_nr, args->pages, 1); |
2767 | kfree(va_node); | 2857 | kfree(va_node); |
2768 | goto clean_up; | 2858 | goto clean_up; |
2769 | } | 2859 | } |
@@ -2775,7 +2865,6 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, | |||
2775 | mutex_unlock(&vm->update_gmmu_lock); | 2865 | mutex_unlock(&vm->update_gmmu_lock); |
2776 | 2866 | ||
2777 | args->o_a.offset = vaddr_start; | 2867 | args->o_a.offset = vaddr_start; |
2778 | err = 0; | ||
2779 | 2868 | ||
2780 | clean_up: | 2869 | clean_up: |
2781 | return err; | 2870 | return err; |
@@ -2786,6 +2875,7 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share, | |||
2786 | { | 2875 | { |
2787 | int err = -ENOMEM; | 2876 | int err = -ENOMEM; |
2788 | int pgsz_idx; | 2877 | int pgsz_idx; |
2878 | u32 start_page_nr; | ||
2789 | struct gk20a_allocator *vma; | 2879 | struct gk20a_allocator *vma; |
2790 | struct vm_gk20a *vm = as_share->vm; | 2880 | struct vm_gk20a *vm = as_share->vm; |
2791 | struct vm_reserved_va_node *va_node; | 2881 | struct vm_reserved_va_node *va_node; |
@@ -2798,8 +2888,14 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share, | |||
2798 | pgsz_idx = __nv_gmmu_va_is_upper(vm, args->offset) ? | 2888 | pgsz_idx = __nv_gmmu_va_is_upper(vm, args->offset) ? |
2799 | gmmu_page_size_big : gmmu_page_size_small; | 2889 | gmmu_page_size_big : gmmu_page_size_small; |
2800 | 2890 | ||
2891 | start_page_nr = (u32)(args->offset >> | ||
2892 | ilog2(vm->gmmu_page_sizes[pgsz_idx])); | ||
2893 | |||
2801 | vma = &vm->vma[pgsz_idx]; | 2894 | vma = &vm->vma[pgsz_idx]; |
2802 | gk20a_bfree(vma, args->offset); | 2895 | err = vma->free(vma, start_page_nr, args->pages, 1); |
2896 | |||
2897 | if (err) | ||
2898 | goto clean_up; | ||
2803 | 2899 | ||
2804 | mutex_lock(&vm->update_gmmu_lock); | 2900 | mutex_lock(&vm->update_gmmu_lock); |
2805 | va_node = addr_to_reservation(vm, args->offset); | 2901 | va_node = addr_to_reservation(vm, args->offset); |
@@ -2829,8 +2925,8 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share, | |||
2829 | kfree(va_node); | 2925 | kfree(va_node); |
2830 | } | 2926 | } |
2831 | mutex_unlock(&vm->update_gmmu_lock); | 2927 | mutex_unlock(&vm->update_gmmu_lock); |
2832 | err = 0; | ||
2833 | 2928 | ||
2929 | clean_up: | ||
2834 | return err; | 2930 | return err; |
2835 | } | 2931 | } |
2836 | 2932 | ||