summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
diff options
context:
space:
mode:
authorBharat Nihalani <bnihalani@nvidia.com>2015-06-04 08:08:59 -0400
committerTerje Bergstrom <tbergstrom@nvidia.com>2015-06-04 13:41:00 -0400
commitb8aa486109a43a8c92159b0845a4adc9f6a84654 (patch)
tree9086e63713df6a63d4ad152503c49e238d23c7d3 /drivers/gpu/nvgpu/gk20a/mm_gk20a.c
parent56d7896731c91595db3205777f308fbcaeac7340 (diff)
Revert "Revert "Revert "Revert "gpu: nvgpu: New allocator for VA space""""
This reverts commit 2e5803d0f2b7d7a1577a40f45ab9f3b22ef2df80 since the issue seen with bug 200106514 is fixed with change http://git-master/r/#/c/752080/. Bug 200112195 Change-Id: I588151c2a7ea74bd89dc3fd48bb81ff2c49f5a0a Signed-off-by: Bharat Nihalani <bnihalani@nvidia.com> Reviewed-on: http://git-master/r/752503 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c202
1 files changed, 53 insertions, 149 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 735c262a..a38db709 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -132,10 +132,8 @@ static void gk20a_mm_delete_priv(void *_priv)
132 132
133 if (priv->comptags.lines) { 133 if (priv->comptags.lines) {
134 BUG_ON(!priv->comptag_allocator); 134 BUG_ON(!priv->comptag_allocator);
135 priv->comptag_allocator->free(priv->comptag_allocator, 135 gk20a_bfree(priv->comptag_allocator,
136 priv->comptags.offset, 136 priv->comptags.real_offset);
137 priv->comptags.allocated_lines,
138 1);
139 } 137 }
140 138
141 /* Free buffer states */ 139 /* Free buffer states */
@@ -226,10 +224,9 @@ static int gk20a_alloc_comptags(struct gk20a *g,
226 u32 *ctag_map_win_ctagline) 224 u32 *ctag_map_win_ctagline)
227{ 225{
228 struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev); 226 struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev);
229 u32 offset = 0;
230 int err;
231 u32 ctaglines_to_allocate; 227 u32 ctaglines_to_allocate;
232 u32 ctagline_align; 228 u32 ctagline_align = 1;
229 u32 offset;
233 const u32 aggregate_cacheline_sz = 230 const u32 aggregate_cacheline_sz =
234 g->gr.cacheline_size * g->gr.slices_per_ltc * 231 g->gr.cacheline_size * g->gr.slices_per_ltc *
235 g->ltc_count; 232 g->ltc_count;
@@ -243,7 +240,6 @@ static int gk20a_alloc_comptags(struct gk20a *g,
243 240
244 if (!user_mappable) { 241 if (!user_mappable) {
245 ctaglines_to_allocate = lines; 242 ctaglines_to_allocate = lines;
246 ctagline_align = 1;
247 } else { 243 } else {
248 /* Unfortunately, we cannot use allocation alignment 244 /* Unfortunately, we cannot use allocation alignment
249 * here, since compbits per cacheline is not always a 245 * here, since compbits per cacheline is not always a
@@ -275,82 +271,26 @@ static int gk20a_alloc_comptags(struct gk20a *g,
275 271
276 if (ctaglines_to_allocate < lines) 272 if (ctaglines_to_allocate < lines)
277 return -EINVAL; /* integer overflow */ 273 return -EINVAL; /* integer overflow */
274 pr_info("user-mapped CTAGS: %u\n", ctaglines_to_allocate);
278 } 275 }
279 276
280 /* store the allocator so we can use it when we free the ctags */ 277 /* store the allocator so we can use it when we free the ctags */
281 priv->comptag_allocator = allocator; 278 priv->comptag_allocator = allocator;
282 err = allocator->alloc(allocator, &offset, 279 offset = gk20a_balloc(allocator, ctaglines_to_allocate);
283 ctaglines_to_allocate, 1); 280 if (!offset)
284 if (!err) { 281 return -ENOMEM;
285 const u32 alignment_lines =
286 DIV_ROUND_UP(offset, ctagline_align) * ctagline_align -
287 offset;
288
289 /* prune the preceding ctaglines that were allocated
290 for alignment */
291 if (alignment_lines) {
292 /* free alignment lines */
293 int tmp=
294 allocator->free(allocator, offset,
295 alignment_lines,
296 1);
297 WARN_ON(tmp);
298
299 offset += alignment_lines;
300 ctaglines_to_allocate -= alignment_lines;
301 }
302 282
303 /* check if we can prune the trailing, too */ 283 priv->comptags.lines = lines;
304 if (user_mappable) 284 priv->comptags.real_offset = offset;
305 {
306 u32 needed_cachelines =
307 DIV_ROUND_UP(lines, g->gr.comptags_per_cacheline);
308
309 u32 first_unneeded_cacheline =
310 DIV_ROUND_UP(round_up(needed_cachelines *
311 aggregate_cacheline_sz,
312 small_pgsz),
313 aggregate_cacheline_sz);
314 u32 needed_ctaglines =
315 first_unneeded_cacheline *
316 g->gr.comptags_per_cacheline;
317
318 u64 win_size;
319
320 if (needed_ctaglines < ctaglines_to_allocate) {
321 /* free alignment lines */
322 int tmp=
323 allocator->free(
324 allocator,
325 offset + needed_ctaglines,
326 (ctaglines_to_allocate -
327 needed_ctaglines),
328 1);
329 WARN_ON(tmp);
330
331 ctaglines_to_allocate = needed_ctaglines;
332 }
333 285
334 *ctag_map_win_ctagline = offset; 286 if (user_mappable)
335 win_size = 287 offset = DIV_ROUND_UP(offset, ctagline_align) * ctagline_align;
336 DIV_ROUND_UP(lines,
337 g->gr.comptags_per_cacheline) *
338 aggregate_cacheline_sz;
339 288
340 *ctag_map_win_size = round_up(win_size, small_pgsz); 289 priv->comptags.offset = offset;
341 }
342 290
343 priv->comptags.offset = offset; 291 return 0;
344 priv->comptags.lines = lines;
345 priv->comptags.allocated_lines = ctaglines_to_allocate;
346 priv->comptags.user_mappable = user_mappable;
347 }
348 return err;
349} 292}
350 293
351
352
353
354static int gk20a_init_mm_reset_enable_hw(struct gk20a *g) 294static int gk20a_init_mm_reset_enable_hw(struct gk20a *g)
355{ 295{
356 gk20a_dbg_fn(""); 296 gk20a_dbg_fn("");
@@ -901,14 +841,12 @@ static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset)
901} 841}
902 842
903u64 gk20a_vm_alloc_va(struct vm_gk20a *vm, 843u64 gk20a_vm_alloc_va(struct vm_gk20a *vm,
904 u64 size, 844 u64 size,
905 enum gmmu_pgsz_gk20a gmmu_pgsz_idx) 845 enum gmmu_pgsz_gk20a gmmu_pgsz_idx)
906 846
907{ 847{
908 struct gk20a_allocator *vma = &vm->vma[gmmu_pgsz_idx]; 848 struct gk20a_allocator *vma = &vm->vma[gmmu_pgsz_idx];
909 int err;
910 u64 offset; 849 u64 offset;
911 u32 start_page_nr = 0, num_pages;
912 u64 gmmu_page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx]; 850 u64 gmmu_page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx];
913 851
914 if (gmmu_pgsz_idx >= gmmu_nr_page_sizes) { 852 if (gmmu_pgsz_idx >= gmmu_nr_page_sizes) {
@@ -924,28 +862,19 @@ u64 gk20a_vm_alloc_va(struct vm_gk20a *vm,
924 862
925 } 863 }
926 864
927 /* be certain we round up to gmmu_page_size if needed */ 865 /* Be certain we round up to gmmu_page_size if needed */
928 /* TBD: DIV_ROUND_UP -> undefined reference to __aeabi_uldivmod */
929 size = (size + ((u64)gmmu_page_size - 1)) & ~((u64)gmmu_page_size - 1); 866 size = (size + ((u64)gmmu_page_size - 1)) & ~((u64)gmmu_page_size - 1);
930
931 gk20a_dbg_info("size=0x%llx @ pgsz=%dKB", size, 867 gk20a_dbg_info("size=0x%llx @ pgsz=%dKB", size,
932 vm->gmmu_page_sizes[gmmu_pgsz_idx]>>10); 868 vm->gmmu_page_sizes[gmmu_pgsz_idx]>>10);
933 869
934 /* The vma allocator represents page accounting. */ 870 offset = gk20a_balloc(vma, size);
935 num_pages = size >> ilog2(vm->gmmu_page_sizes[gmmu_pgsz_idx]); 871 if (!offset) {
936
937 err = vma->alloc(vma, &start_page_nr, num_pages, 1);
938
939 if (err) {
940 gk20a_err(dev_from_vm(vm), 872 gk20a_err(dev_from_vm(vm),
941 "%s oom: sz=0x%llx", vma->name, size); 873 "%s oom: sz=0x%llx", vma->name, size);
942 return 0; 874 return 0;
943 } 875 }
944 876
945 offset = (u64)start_page_nr <<
946 ilog2(vm->gmmu_page_sizes[gmmu_pgsz_idx]);
947 gk20a_dbg_fn("%s found addr: 0x%llx", vma->name, offset); 877 gk20a_dbg_fn("%s found addr: 0x%llx", vma->name, offset);
948
949 return offset; 878 return offset;
950} 879}
951 880
@@ -954,25 +883,12 @@ int gk20a_vm_free_va(struct vm_gk20a *vm,
954 enum gmmu_pgsz_gk20a pgsz_idx) 883 enum gmmu_pgsz_gk20a pgsz_idx)
955{ 884{
956 struct gk20a_allocator *vma = &vm->vma[pgsz_idx]; 885 struct gk20a_allocator *vma = &vm->vma[pgsz_idx];
957 u32 page_size = vm->gmmu_page_sizes[pgsz_idx];
958 u32 page_shift = ilog2(page_size);
959 u32 start_page_nr, num_pages;
960 int err;
961 886
962 gk20a_dbg_info("%s free addr=0x%llx, size=0x%llx", 887 gk20a_dbg_info("%s free addr=0x%llx, size=0x%llx",
963 vma->name, offset, size); 888 vma->name, offset, size);
889 gk20a_bfree(vma, offset);
964 890
965 start_page_nr = (u32)(offset >> page_shift); 891 return 0;
966 num_pages = (u32)((size + page_size - 1) >> page_shift);
967
968 err = vma->free(vma, start_page_nr, num_pages, 1);
969 if (err) {
970 gk20a_err(dev_from_vm(vm),
971 "not found: offset=0x%llx, sz=0x%llx",
972 offset, size);
973 }
974
975 return err;
976} 892}
977 893
978static int insert_mapped_buffer(struct rb_root *root, 894static int insert_mapped_buffer(struct rb_root *root,
@@ -1169,7 +1085,7 @@ static int validate_fixed_buffer(struct vm_gk20a *vm,
1169 1085
1170 if (map_offset & (vm->gmmu_page_sizes[bfr->pgsz_idx] - 1)) { 1086 if (map_offset & (vm->gmmu_page_sizes[bfr->pgsz_idx] - 1)) {
1171 gk20a_err(dev, "map offset must be buffer page size aligned 0x%llx", 1087 gk20a_err(dev, "map offset must be buffer page size aligned 0x%llx",
1172 map_offset); 1088 map_offset);
1173 return -EINVAL; 1089 return -EINVAL;
1174 } 1090 }
1175 1091
@@ -2613,7 +2529,6 @@ int gk20a_init_vm(struct mm_gk20a *mm,
2613 char *name) 2529 char *name)
2614{ 2530{
2615 int err, i; 2531 int err, i;
2616 u32 num_small_pages, num_large_pages, low_hole_pages;
2617 char alloc_name[32]; 2532 char alloc_name[32];
2618 u64 small_vma_size, large_vma_size; 2533 u64 small_vma_size, large_vma_size;
2619 u32 pde_lo, pde_hi; 2534 u32 pde_lo, pde_hi;
@@ -2674,34 +2589,31 @@ int gk20a_init_vm(struct mm_gk20a *mm,
2674 large_vma_size = vm->va_limit - small_vma_size; 2589 large_vma_size = vm->va_limit - small_vma_size;
2675 } 2590 }
2676 2591
2677 num_small_pages = (u32)(small_vma_size >>
2678 ilog2(vm->gmmu_page_sizes[gmmu_page_size_small]));
2679
2680 /* num_pages above is without regard to the low-side hole. */
2681 low_hole_pages = (vm->va_start >>
2682 ilog2(vm->gmmu_page_sizes[gmmu_page_size_small]));
2683
2684 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", name, 2592 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", name,
2685 vm->gmmu_page_sizes[gmmu_page_size_small]>>10); 2593 vm->gmmu_page_sizes[gmmu_page_size_small]>>10);
2686 err = gk20a_allocator_init(&vm->vma[gmmu_page_size_small], 2594 err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_small],
2687 alloc_name, 2595 vm, alloc_name,
2688 low_hole_pages, /*start*/ 2596 vm->va_start,
2689 num_small_pages - low_hole_pages);/* length*/ 2597 small_vma_size - vm->va_start,
2598 SZ_4K,
2599 GPU_BALLOC_MAX_ORDER,
2600 GPU_BALLOC_GVA_SPACE);
2690 if (err) 2601 if (err)
2691 goto clean_up_ptes; 2602 goto clean_up_ptes;
2692 2603
2693 if (big_pages) { 2604 if (big_pages) {
2694 u32 start = (u32)(small_vma_size >>
2695 ilog2(vm->gmmu_page_sizes[gmmu_page_size_big]));
2696 num_large_pages = (u32)(large_vma_size >>
2697 ilog2(vm->gmmu_page_sizes[gmmu_page_size_big]));
2698
2699 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", 2605 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB",
2700 name, vm->gmmu_page_sizes[gmmu_page_size_big]>>10); 2606 name, vm->gmmu_page_sizes[gmmu_page_size_big]>>10);
2701 err = gk20a_allocator_init(&vm->vma[gmmu_page_size_big], 2607 /*
2702 alloc_name, 2608 * Big page VMA starts at the end of the small page VMA.
2703 start, /* start */ 2609 */
2704 num_large_pages); /* length */ 2610 err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_big],
2611 vm, alloc_name,
2612 small_vma_size,
2613 large_vma_size,
2614 big_page_size,
2615 GPU_BALLOC_MAX_ORDER,
2616 GPU_BALLOC_GVA_SPACE);
2705 if (err) 2617 if (err)
2706 goto clean_up_small_allocator; 2618 goto clean_up_small_allocator;
2707 } 2619 }
@@ -2782,9 +2694,9 @@ int gk20a_vm_release_share(struct gk20a_as_share *as_share)
2782int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, 2694int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
2783 struct nvgpu_as_alloc_space_args *args) 2695 struct nvgpu_as_alloc_space_args *args)
2784 2696
2785{ int err = -ENOMEM; 2697{
2698 int err = -ENOMEM;
2786 int pgsz_idx = gmmu_page_size_small; 2699 int pgsz_idx = gmmu_page_size_small;
2787 u32 start_page_nr;
2788 struct gk20a_allocator *vma; 2700 struct gk20a_allocator *vma;
2789 struct vm_gk20a *vm = as_share->vm; 2701 struct vm_gk20a *vm = as_share->vm;
2790 struct gk20a *g = vm->mm->g; 2702 struct gk20a *g = vm->mm->g;
@@ -2815,21 +2727,19 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
2815 goto clean_up; 2727 goto clean_up;
2816 } 2728 }
2817 2729
2818 start_page_nr = 0; 2730 vma = &vm->vma[pgsz_idx];
2819 if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET) 2731 if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET)
2820 start_page_nr = (u32)(args->o_a.offset >> 2732 vaddr_start = gk20a_balloc_fixed(vma, args->o_a.offset,
2821 ilog2(vm->gmmu_page_sizes[pgsz_idx])); 2733 (u64)args->pages *
2734 (u64)args->page_size);
2735 else
2736 vaddr_start = gk20a_balloc(vma, args->pages * args->page_size);
2822 2737
2823 vma = &vm->vma[pgsz_idx]; 2738 if (!vaddr_start) {
2824 err = vma->alloc(vma, &start_page_nr, args->pages, 1);
2825 if (err) {
2826 kfree(va_node); 2739 kfree(va_node);
2827 goto clean_up; 2740 goto clean_up;
2828 } 2741 }
2829 2742
2830 vaddr_start = (u64)start_page_nr <<
2831 ilog2(vm->gmmu_page_sizes[pgsz_idx]);
2832
2833 va_node->vaddr_start = vaddr_start; 2743 va_node->vaddr_start = vaddr_start;
2834 va_node->size = (u64)args->page_size * (u64)args->pages; 2744 va_node->size = (u64)args->page_size * (u64)args->pages;
2835 va_node->pgsz_idx = pgsz_idx; 2745 va_node->pgsz_idx = pgsz_idx;
@@ -2853,7 +2763,7 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
2853 true); 2763 true);
2854 if (!map_offset) { 2764 if (!map_offset) {
2855 mutex_unlock(&vm->update_gmmu_lock); 2765 mutex_unlock(&vm->update_gmmu_lock);
2856 vma->free(vma, start_page_nr, args->pages, 1); 2766 gk20a_bfree(vma, vaddr_start);
2857 kfree(va_node); 2767 kfree(va_node);
2858 goto clean_up; 2768 goto clean_up;
2859 } 2769 }
@@ -2865,6 +2775,7 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
2865 mutex_unlock(&vm->update_gmmu_lock); 2775 mutex_unlock(&vm->update_gmmu_lock);
2866 2776
2867 args->o_a.offset = vaddr_start; 2777 args->o_a.offset = vaddr_start;
2778 err = 0;
2868 2779
2869clean_up: 2780clean_up:
2870 return err; 2781 return err;
@@ -2875,7 +2786,6 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
2875{ 2786{
2876 int err = -ENOMEM; 2787 int err = -ENOMEM;
2877 int pgsz_idx; 2788 int pgsz_idx;
2878 u32 start_page_nr;
2879 struct gk20a_allocator *vma; 2789 struct gk20a_allocator *vma;
2880 struct vm_gk20a *vm = as_share->vm; 2790 struct vm_gk20a *vm = as_share->vm;
2881 struct vm_reserved_va_node *va_node; 2791 struct vm_reserved_va_node *va_node;
@@ -2888,14 +2798,8 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
2888 pgsz_idx = __nv_gmmu_va_is_upper(vm, args->offset) ? 2798 pgsz_idx = __nv_gmmu_va_is_upper(vm, args->offset) ?
2889 gmmu_page_size_big : gmmu_page_size_small; 2799 gmmu_page_size_big : gmmu_page_size_small;
2890 2800
2891 start_page_nr = (u32)(args->offset >>
2892 ilog2(vm->gmmu_page_sizes[pgsz_idx]));
2893
2894 vma = &vm->vma[pgsz_idx]; 2801 vma = &vm->vma[pgsz_idx];
2895 err = vma->free(vma, start_page_nr, args->pages, 1); 2802 gk20a_bfree(vma, args->offset);
2896
2897 if (err)
2898 goto clean_up;
2899 2803
2900 mutex_lock(&vm->update_gmmu_lock); 2804 mutex_lock(&vm->update_gmmu_lock);
2901 va_node = addr_to_reservation(vm, args->offset); 2805 va_node = addr_to_reservation(vm, args->offset);
@@ -2925,8 +2829,8 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
2925 kfree(va_node); 2829 kfree(va_node);
2926 } 2830 }
2927 mutex_unlock(&vm->update_gmmu_lock); 2831 mutex_unlock(&vm->update_gmmu_lock);
2832 err = 0;
2928 2833
2929clean_up:
2930 return err; 2834 return err;
2931} 2835}
2932 2836