summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
diff options
context:
space:
mode:
authorBharat Nihalani <bnihalani@nvidia.com>2015-05-29 06:56:23 -0400
committerBharat Nihalani <bnihalani@nvidia.com>2015-06-02 23:18:55 -0400
commit1d8fdf56959240622073dd771dd9bfccf31b8f8e (patch)
tree5c670e604825ddc25d6b6b0cce32cb3e7dc6871a /drivers/gpu/nvgpu/gk20a/mm_gk20a.c
parent38cee4d7effe5a2079a08b3c9a216b3197893959 (diff)
Revert "Revert "Revert "gpu: nvgpu: New allocator for VA space"""
This reverts commit ce1cf06b9a8eb6314ba0ca294e8cb430e1e141c0 since it causes GPU pbdma interrupt to be generated. Bug 200106514 Change-Id: If3ed9a914c4e3e7f3f98c6609c6dbf57e1eb9aad Signed-off-by: Bharat Nihalani <bnihalani@nvidia.com> Reviewed-on: http://git-master/r/749291
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c202
1 files changed, 149 insertions, 53 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index a38db709..735c262a 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -132,8 +132,10 @@ static void gk20a_mm_delete_priv(void *_priv)
132 132
133 if (priv->comptags.lines) { 133 if (priv->comptags.lines) {
134 BUG_ON(!priv->comptag_allocator); 134 BUG_ON(!priv->comptag_allocator);
135 gk20a_bfree(priv->comptag_allocator, 135 priv->comptag_allocator->free(priv->comptag_allocator,
136 priv->comptags.real_offset); 136 priv->comptags.offset,
137 priv->comptags.allocated_lines,
138 1);
137 } 139 }
138 140
139 /* Free buffer states */ 141 /* Free buffer states */
@@ -224,9 +226,10 @@ static int gk20a_alloc_comptags(struct gk20a *g,
224 u32 *ctag_map_win_ctagline) 226 u32 *ctag_map_win_ctagline)
225{ 227{
226 struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev); 228 struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev);
229 u32 offset = 0;
230 int err;
227 u32 ctaglines_to_allocate; 231 u32 ctaglines_to_allocate;
228 u32 ctagline_align = 1; 232 u32 ctagline_align;
229 u32 offset;
230 const u32 aggregate_cacheline_sz = 233 const u32 aggregate_cacheline_sz =
231 g->gr.cacheline_size * g->gr.slices_per_ltc * 234 g->gr.cacheline_size * g->gr.slices_per_ltc *
232 g->ltc_count; 235 g->ltc_count;
@@ -240,6 +243,7 @@ static int gk20a_alloc_comptags(struct gk20a *g,
240 243
241 if (!user_mappable) { 244 if (!user_mappable) {
242 ctaglines_to_allocate = lines; 245 ctaglines_to_allocate = lines;
246 ctagline_align = 1;
243 } else { 247 } else {
244 /* Unfortunately, we cannot use allocation alignment 248 /* Unfortunately, we cannot use allocation alignment
245 * here, since compbits per cacheline is not always a 249 * here, since compbits per cacheline is not always a
@@ -271,26 +275,82 @@ static int gk20a_alloc_comptags(struct gk20a *g,
271 275
272 if (ctaglines_to_allocate < lines) 276 if (ctaglines_to_allocate < lines)
273 return -EINVAL; /* integer overflow */ 277 return -EINVAL; /* integer overflow */
274 pr_info("user-mapped CTAGS: %u\n", ctaglines_to_allocate);
275 } 278 }
276 279
277 /* store the allocator so we can use it when we free the ctags */ 280 /* store the allocator so we can use it when we free the ctags */
278 priv->comptag_allocator = allocator; 281 priv->comptag_allocator = allocator;
279 offset = gk20a_balloc(allocator, ctaglines_to_allocate); 282 err = allocator->alloc(allocator, &offset,
280 if (!offset) 283 ctaglines_to_allocate, 1);
281 return -ENOMEM; 284 if (!err) {
285 const u32 alignment_lines =
286 DIV_ROUND_UP(offset, ctagline_align) * ctagline_align -
287 offset;
288
289 /* prune the preceding ctaglines that were allocated
290 for alignment */
291 if (alignment_lines) {
292 /* free alignment lines */
293 int tmp=
294 allocator->free(allocator, offset,
295 alignment_lines,
296 1);
297 WARN_ON(tmp);
298
299 offset += alignment_lines;
300 ctaglines_to_allocate -= alignment_lines;
301 }
282 302
283 priv->comptags.lines = lines; 303 /* check if we can prune the trailing, too */
284 priv->comptags.real_offset = offset; 304 if (user_mappable)
305 {
306 u32 needed_cachelines =
307 DIV_ROUND_UP(lines, g->gr.comptags_per_cacheline);
308
309 u32 first_unneeded_cacheline =
310 DIV_ROUND_UP(round_up(needed_cachelines *
311 aggregate_cacheline_sz,
312 small_pgsz),
313 aggregate_cacheline_sz);
314 u32 needed_ctaglines =
315 first_unneeded_cacheline *
316 g->gr.comptags_per_cacheline;
317
318 u64 win_size;
319
320 if (needed_ctaglines < ctaglines_to_allocate) {
321 /* free alignment lines */
322 int tmp=
323 allocator->free(
324 allocator,
325 offset + needed_ctaglines,
326 (ctaglines_to_allocate -
327 needed_ctaglines),
328 1);
329 WARN_ON(tmp);
330
331 ctaglines_to_allocate = needed_ctaglines;
332 }
285 333
286 if (user_mappable) 334 *ctag_map_win_ctagline = offset;
287 offset = DIV_ROUND_UP(offset, ctagline_align) * ctagline_align; 335 win_size =
336 DIV_ROUND_UP(lines,
337 g->gr.comptags_per_cacheline) *
338 aggregate_cacheline_sz;
288 339
289 priv->comptags.offset = offset; 340 *ctag_map_win_size = round_up(win_size, small_pgsz);
341 }
290 342
291 return 0; 343 priv->comptags.offset = offset;
344 priv->comptags.lines = lines;
345 priv->comptags.allocated_lines = ctaglines_to_allocate;
346 priv->comptags.user_mappable = user_mappable;
347 }
348 return err;
292} 349}
293 350
351
352
353
294static int gk20a_init_mm_reset_enable_hw(struct gk20a *g) 354static int gk20a_init_mm_reset_enable_hw(struct gk20a *g)
295{ 355{
296 gk20a_dbg_fn(""); 356 gk20a_dbg_fn("");
@@ -841,12 +901,14 @@ static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset)
841} 901}
842 902
843u64 gk20a_vm_alloc_va(struct vm_gk20a *vm, 903u64 gk20a_vm_alloc_va(struct vm_gk20a *vm,
844 u64 size, 904 u64 size,
845 enum gmmu_pgsz_gk20a gmmu_pgsz_idx) 905 enum gmmu_pgsz_gk20a gmmu_pgsz_idx)
846 906
847{ 907{
848 struct gk20a_allocator *vma = &vm->vma[gmmu_pgsz_idx]; 908 struct gk20a_allocator *vma = &vm->vma[gmmu_pgsz_idx];
909 int err;
849 u64 offset; 910 u64 offset;
911 u32 start_page_nr = 0, num_pages;
850 u64 gmmu_page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx]; 912 u64 gmmu_page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx];
851 913
852 if (gmmu_pgsz_idx >= gmmu_nr_page_sizes) { 914 if (gmmu_pgsz_idx >= gmmu_nr_page_sizes) {
@@ -862,19 +924,28 @@ u64 gk20a_vm_alloc_va(struct vm_gk20a *vm,
862 924
863 } 925 }
864 926
865 /* Be certain we round up to gmmu_page_size if needed */ 927 /* be certain we round up to gmmu_page_size if needed */
928 /* TBD: DIV_ROUND_UP -> undefined reference to __aeabi_uldivmod */
866 size = (size + ((u64)gmmu_page_size - 1)) & ~((u64)gmmu_page_size - 1); 929 size = (size + ((u64)gmmu_page_size - 1)) & ~((u64)gmmu_page_size - 1);
930
867 gk20a_dbg_info("size=0x%llx @ pgsz=%dKB", size, 931 gk20a_dbg_info("size=0x%llx @ pgsz=%dKB", size,
868 vm->gmmu_page_sizes[gmmu_pgsz_idx]>>10); 932 vm->gmmu_page_sizes[gmmu_pgsz_idx]>>10);
869 933
870 offset = gk20a_balloc(vma, size); 934 /* The vma allocator represents page accounting. */
871 if (!offset) { 935 num_pages = size >> ilog2(vm->gmmu_page_sizes[gmmu_pgsz_idx]);
936
937 err = vma->alloc(vma, &start_page_nr, num_pages, 1);
938
939 if (err) {
872 gk20a_err(dev_from_vm(vm), 940 gk20a_err(dev_from_vm(vm),
873 "%s oom: sz=0x%llx", vma->name, size); 941 "%s oom: sz=0x%llx", vma->name, size);
874 return 0; 942 return 0;
875 } 943 }
876 944
945 offset = (u64)start_page_nr <<
946 ilog2(vm->gmmu_page_sizes[gmmu_pgsz_idx]);
877 gk20a_dbg_fn("%s found addr: 0x%llx", vma->name, offset); 947 gk20a_dbg_fn("%s found addr: 0x%llx", vma->name, offset);
948
878 return offset; 949 return offset;
879} 950}
880 951
@@ -883,12 +954,25 @@ int gk20a_vm_free_va(struct vm_gk20a *vm,
883 enum gmmu_pgsz_gk20a pgsz_idx) 954 enum gmmu_pgsz_gk20a pgsz_idx)
884{ 955{
885 struct gk20a_allocator *vma = &vm->vma[pgsz_idx]; 956 struct gk20a_allocator *vma = &vm->vma[pgsz_idx];
957 u32 page_size = vm->gmmu_page_sizes[pgsz_idx];
958 u32 page_shift = ilog2(page_size);
959 u32 start_page_nr, num_pages;
960 int err;
886 961
887 gk20a_dbg_info("%s free addr=0x%llx, size=0x%llx", 962 gk20a_dbg_info("%s free addr=0x%llx, size=0x%llx",
888 vma->name, offset, size); 963 vma->name, offset, size);
889 gk20a_bfree(vma, offset);
890 964
891 return 0; 965 start_page_nr = (u32)(offset >> page_shift);
966 num_pages = (u32)((size + page_size - 1) >> page_shift);
967
968 err = vma->free(vma, start_page_nr, num_pages, 1);
969 if (err) {
970 gk20a_err(dev_from_vm(vm),
971 "not found: offset=0x%llx, sz=0x%llx",
972 offset, size);
973 }
974
975 return err;
892} 976}
893 977
894static int insert_mapped_buffer(struct rb_root *root, 978static int insert_mapped_buffer(struct rb_root *root,
@@ -1085,7 +1169,7 @@ static int validate_fixed_buffer(struct vm_gk20a *vm,
1085 1169
1086 if (map_offset & (vm->gmmu_page_sizes[bfr->pgsz_idx] - 1)) { 1170 if (map_offset & (vm->gmmu_page_sizes[bfr->pgsz_idx] - 1)) {
1087 gk20a_err(dev, "map offset must be buffer page size aligned 0x%llx", 1171 gk20a_err(dev, "map offset must be buffer page size aligned 0x%llx",
1088 map_offset); 1172 map_offset);
1089 return -EINVAL; 1173 return -EINVAL;
1090 } 1174 }
1091 1175
@@ -2529,6 +2613,7 @@ int gk20a_init_vm(struct mm_gk20a *mm,
2529 char *name) 2613 char *name)
2530{ 2614{
2531 int err, i; 2615 int err, i;
2616 u32 num_small_pages, num_large_pages, low_hole_pages;
2532 char alloc_name[32]; 2617 char alloc_name[32];
2533 u64 small_vma_size, large_vma_size; 2618 u64 small_vma_size, large_vma_size;
2534 u32 pde_lo, pde_hi; 2619 u32 pde_lo, pde_hi;
@@ -2589,31 +2674,34 @@ int gk20a_init_vm(struct mm_gk20a *mm,
2589 large_vma_size = vm->va_limit - small_vma_size; 2674 large_vma_size = vm->va_limit - small_vma_size;
2590 } 2675 }
2591 2676
2677 num_small_pages = (u32)(small_vma_size >>
2678 ilog2(vm->gmmu_page_sizes[gmmu_page_size_small]));
2679
2680 /* num_pages above is without regard to the low-side hole. */
2681 low_hole_pages = (vm->va_start >>
2682 ilog2(vm->gmmu_page_sizes[gmmu_page_size_small]));
2683
2592 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", name, 2684 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", name,
2593 vm->gmmu_page_sizes[gmmu_page_size_small]>>10); 2685 vm->gmmu_page_sizes[gmmu_page_size_small]>>10);
2594 err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_small], 2686 err = gk20a_allocator_init(&vm->vma[gmmu_page_size_small],
2595 vm, alloc_name, 2687 alloc_name,
2596 vm->va_start, 2688 low_hole_pages, /*start*/
2597 small_vma_size - vm->va_start, 2689 num_small_pages - low_hole_pages);/* length*/
2598 SZ_4K,
2599 GPU_BALLOC_MAX_ORDER,
2600 GPU_BALLOC_GVA_SPACE);
2601 if (err) 2690 if (err)
2602 goto clean_up_ptes; 2691 goto clean_up_ptes;
2603 2692
2604 if (big_pages) { 2693 if (big_pages) {
2694 u32 start = (u32)(small_vma_size >>
2695 ilog2(vm->gmmu_page_sizes[gmmu_page_size_big]));
2696 num_large_pages = (u32)(large_vma_size >>
2697 ilog2(vm->gmmu_page_sizes[gmmu_page_size_big]));
2698
2605 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", 2699 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB",
2606 name, vm->gmmu_page_sizes[gmmu_page_size_big]>>10); 2700 name, vm->gmmu_page_sizes[gmmu_page_size_big]>>10);
2607 /* 2701 err = gk20a_allocator_init(&vm->vma[gmmu_page_size_big],
2608 * Big page VMA starts at the end of the small page VMA. 2702 alloc_name,
2609 */ 2703 start, /* start */
2610 err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_big], 2704 num_large_pages); /* length */
2611 vm, alloc_name,
2612 small_vma_size,
2613 large_vma_size,
2614 big_page_size,
2615 GPU_BALLOC_MAX_ORDER,
2616 GPU_BALLOC_GVA_SPACE);
2617 if (err) 2705 if (err)
2618 goto clean_up_small_allocator; 2706 goto clean_up_small_allocator;
2619 } 2707 }
@@ -2694,9 +2782,9 @@ int gk20a_vm_release_share(struct gk20a_as_share *as_share)
2694int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, 2782int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
2695 struct nvgpu_as_alloc_space_args *args) 2783 struct nvgpu_as_alloc_space_args *args)
2696 2784
2697{ 2785{ int err = -ENOMEM;
2698 int err = -ENOMEM;
2699 int pgsz_idx = gmmu_page_size_small; 2786 int pgsz_idx = gmmu_page_size_small;
2787 u32 start_page_nr;
2700 struct gk20a_allocator *vma; 2788 struct gk20a_allocator *vma;
2701 struct vm_gk20a *vm = as_share->vm; 2789 struct vm_gk20a *vm = as_share->vm;
2702 struct gk20a *g = vm->mm->g; 2790 struct gk20a *g = vm->mm->g;
@@ -2727,19 +2815,21 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
2727 goto clean_up; 2815 goto clean_up;
2728 } 2816 }
2729 2817
2730 vma = &vm->vma[pgsz_idx]; 2818 start_page_nr = 0;
2731 if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET) 2819 if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET)
2732 vaddr_start = gk20a_balloc_fixed(vma, args->o_a.offset, 2820 start_page_nr = (u32)(args->o_a.offset >>
2733 (u64)args->pages * 2821 ilog2(vm->gmmu_page_sizes[pgsz_idx]));
2734 (u64)args->page_size);
2735 else
2736 vaddr_start = gk20a_balloc(vma, args->pages * args->page_size);
2737 2822
2738 if (!vaddr_start) { 2823 vma = &vm->vma[pgsz_idx];
2824 err = vma->alloc(vma, &start_page_nr, args->pages, 1);
2825 if (err) {
2739 kfree(va_node); 2826 kfree(va_node);
2740 goto clean_up; 2827 goto clean_up;
2741 } 2828 }
2742 2829
2830 vaddr_start = (u64)start_page_nr <<
2831 ilog2(vm->gmmu_page_sizes[pgsz_idx]);
2832
2743 va_node->vaddr_start = vaddr_start; 2833 va_node->vaddr_start = vaddr_start;
2744 va_node->size = (u64)args->page_size * (u64)args->pages; 2834 va_node->size = (u64)args->page_size * (u64)args->pages;
2745 va_node->pgsz_idx = pgsz_idx; 2835 va_node->pgsz_idx = pgsz_idx;
@@ -2763,7 +2853,7 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
2763 true); 2853 true);
2764 if (!map_offset) { 2854 if (!map_offset) {
2765 mutex_unlock(&vm->update_gmmu_lock); 2855 mutex_unlock(&vm->update_gmmu_lock);
2766 gk20a_bfree(vma, vaddr_start); 2856 vma->free(vma, start_page_nr, args->pages, 1);
2767 kfree(va_node); 2857 kfree(va_node);
2768 goto clean_up; 2858 goto clean_up;
2769 } 2859 }
@@ -2775,7 +2865,6 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
2775 mutex_unlock(&vm->update_gmmu_lock); 2865 mutex_unlock(&vm->update_gmmu_lock);
2776 2866
2777 args->o_a.offset = vaddr_start; 2867 args->o_a.offset = vaddr_start;
2778 err = 0;
2779 2868
2780clean_up: 2869clean_up:
2781 return err; 2870 return err;
@@ -2786,6 +2875,7 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
2786{ 2875{
2787 int err = -ENOMEM; 2876 int err = -ENOMEM;
2788 int pgsz_idx; 2877 int pgsz_idx;
2878 u32 start_page_nr;
2789 struct gk20a_allocator *vma; 2879 struct gk20a_allocator *vma;
2790 struct vm_gk20a *vm = as_share->vm; 2880 struct vm_gk20a *vm = as_share->vm;
2791 struct vm_reserved_va_node *va_node; 2881 struct vm_reserved_va_node *va_node;
@@ -2798,8 +2888,14 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
2798 pgsz_idx = __nv_gmmu_va_is_upper(vm, args->offset) ? 2888 pgsz_idx = __nv_gmmu_va_is_upper(vm, args->offset) ?
2799 gmmu_page_size_big : gmmu_page_size_small; 2889 gmmu_page_size_big : gmmu_page_size_small;
2800 2890
2891 start_page_nr = (u32)(args->offset >>
2892 ilog2(vm->gmmu_page_sizes[pgsz_idx]));
2893
2801 vma = &vm->vma[pgsz_idx]; 2894 vma = &vm->vma[pgsz_idx];
2802 gk20a_bfree(vma, args->offset); 2895 err = vma->free(vma, start_page_nr, args->pages, 1);
2896
2897 if (err)
2898 goto clean_up;
2803 2899
2804 mutex_lock(&vm->update_gmmu_lock); 2900 mutex_lock(&vm->update_gmmu_lock);
2805 va_node = addr_to_reservation(vm, args->offset); 2901 va_node = addr_to_reservation(vm, args->offset);
@@ -2829,8 +2925,8 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
2829 kfree(va_node); 2925 kfree(va_node);
2830 } 2926 }
2831 mutex_unlock(&vm->update_gmmu_lock); 2927 mutex_unlock(&vm->update_gmmu_lock);
2832 err = 0;
2833 2928
2929clean_up:
2834 return err; 2930 return err;
2835} 2931}
2836 2932