Revert "Revert "Revert "Revert "gpu: nvgpu: New allocator for VA space""""

This reverts commit 2e5803d0f2b7d7a1577a40f45ab9f3b22ef2df80 since the issue seen with bug 200106514 is fixed with change http://git-master/r/#/c/752080/. Bug 200112195 Change-Id: I588151c2a7ea74bd89dc3fd48bb81ff2c49f5a0a Signed-off-by: Bharat Nihalani <bnihalani@nvidia.com> Reviewed-on: http://git-master/r/752503 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
author: Bharat Nihalani <bnihalani@nvidia.com> 2015-06-04 08:08:59 -0400
committer: Terje Bergstrom <tbergstrom@nvidia.com> 2015-06-04 13:41:00 -0400
commit: b8aa486109a43a8c92159b0845a4adc9f6a84654 (patch)
tree: 9086e63713df6a63d4ad152503c49e238d23c7d3 /drivers/gpu/nvgpu/gk20a/mm_gk20a.c
parent: 56d7896731c91595db3205777f308fbcaeac7340 (diff)
1 files changed, 53 insertions, 149 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 735c262a..a38db709 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -132,10 +132,8 @@ static void gk20a_mm_delete_priv(void *_priv)
        if (priv->comptags.lines) {
                BUG_ON(!priv->comptag_allocator);
-                priv->comptag_allocator->free(priv->comptag_allocator,
+                gk20a_bfree(priv->comptag_allocator,
-                                              priv->comptags.offset,
+                            priv->comptags.real_offset);
-                                              priv->comptags.allocated_lines,
-                                              1);
        }
        /* Free buffer states */
@@ -226,10 +224,9 @@ static int gk20a_alloc_comptags(struct gk20a *g,
                                u32 *ctag_map_win_ctagline)
 {
        struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev);
-        u32 offset = 0;
-        int err;
        u32 ctaglines_to_allocate;
-        u32 ctagline_align;
+        u32 ctagline_align = 1;
+        u32 offset;
        const u32 aggregate_cacheline_sz =
                g->gr.cacheline_size * g->gr.slices_per_ltc *
                g->ltc_count;
@@ -243,7 +240,6 @@ static int gk20a_alloc_comptags(struct gk20a *g,
        if (!user_mappable) {
                ctaglines_to_allocate = lines;
-                ctagline_align = 1;
        } else {
                /* Unfortunately, we cannot use allocation alignment
                 * here, since compbits per cacheline is not always a
@@ -275,82 +271,26 @@ static int gk20a_alloc_comptags(struct gk20a *g,
                if (ctaglines_to_allocate < lines)
                        return -EINVAL; /* integer overflow */
+                pr_info("user-mapped CTAGS: %u\n", ctaglines_to_allocate);
        }
        /* store the allocator so we can use it when we free the ctags */
        priv->comptag_allocator = allocator;
-        err = allocator->alloc(allocator, &offset,
+        offset = gk20a_balloc(allocator, ctaglines_to_allocate);
-                               ctaglines_to_allocate, 1);
+        if (!offset)
-        if (!err) {
+                return -ENOMEM;
-                const u32 alignment_lines =
-                        DIV_ROUND_UP(offset, ctagline_align) * ctagline_align -
-                        offset;
-                /* prune the preceding ctaglines that were allocated
-                   for alignment */
-                if (alignment_lines) {
-                        /* free alignment lines */
-                        int tmp=
-                                allocator->free(allocator, offset,
-                                                alignment_lines,
-                                                1);
-                        WARN_ON(tmp);
-                        offset += alignment_lines;
-                        ctaglines_to_allocate -= alignment_lines;
-                }
-                /* check if we can prune the trailing, too */
+        priv->comptags.lines = lines;
-                if (user_mappable)
+        priv->comptags.real_offset = offset;
-                {
-                        u32 needed_cachelines =
-                                DIV_ROUND_UP(lines, g->gr.comptags_per_cacheline);
-                        u32 first_unneeded_cacheline =
-                                DIV_ROUND_UP(round_up(needed_cachelines *
-                                                      aggregate_cacheline_sz,
-                                                      small_pgsz),
-                                             aggregate_cacheline_sz);
-                        u32 needed_ctaglines =
-                                first_unneeded_cacheline *
-                                g->gr.comptags_per_cacheline;
-                        u64 win_size;
-                        if (needed_ctaglines < ctaglines_to_allocate) {
-                                /* free alignment lines */
-                                int tmp=
-                                        allocator->free(
-                                                allocator,
-                                                offset + needed_ctaglines,
-                                                (ctaglines_to_allocate -
-                                                 needed_ctaglines),
-                                                1);
-                                WARN_ON(tmp);
-                                ctaglines_to_allocate = needed_ctaglines;
-                        }
-                        *ctag_map_win_ctagline = offset;
+        if (user_mappable)
-                        win_size =
+                offset = DIV_ROUND_UP(offset, ctagline_align) * ctagline_align;
-                                DIV_ROUND_UP(lines,
-                                             g->gr.comptags_per_cacheline) *
-                                aggregate_cacheline_sz;
-                        *ctag_map_win_size = round_up(win_size, small_pgsz);
+        priv->comptags.offset = offset;
-                }
-                priv->comptags.offset = offset;
+        return 0;
-                priv->comptags.lines = lines;
-                priv->comptags.allocated_lines = ctaglines_to_allocate;
-                priv->comptags.user_mappable = user_mappable;
-        }
-        return err;
 }
 static int gk20a_init_mm_reset_enable_hw(struct gk20a *g)
 {
        gk20a_dbg_fn("");
@@ -901,14 +841,12 @@ static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset)
 }
 u64 gk20a_vm_alloc_va(struct vm_gk20a *vm,
-                     u64 size,
+                      u64 size,
-                     enum gmmu_pgsz_gk20a gmmu_pgsz_idx)
+                      enum gmmu_pgsz_gk20a gmmu_pgsz_idx)
 {
        struct gk20a_allocator *vma = &vm->vma[gmmu_pgsz_idx];
-        int err;
        u64 offset;
-        u32 start_page_nr = 0, num_pages;
        u64 gmmu_page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx];
        if (gmmu_pgsz_idx >= gmmu_nr_page_sizes) {
@@ -924,28 +862,19 @@ u64 gk20a_vm_alloc_va(struct vm_gk20a *vm,
        }
-        /* be certain we round up to gmmu_page_size if needed */
+        /* Be certain we round up to gmmu_page_size if needed */
-        /* TBD: DIV_ROUND_UP -> undefined reference to __aeabi_uldivmod */
        size = (size + ((u64)gmmu_page_size - 1)) & ~((u64)gmmu_page_size - 1);
        gk20a_dbg_info("size=0x%llx @ pgsz=%dKB", size,
                        vm->gmmu_page_sizes[gmmu_pgsz_idx]>>10);
-        /* The vma allocator represents page accounting. */
+        offset = gk20a_balloc(vma, size);
-        num_pages = size >> ilog2(vm->gmmu_page_sizes[gmmu_pgsz_idx]);
+        if (!offset) {
-        err = vma->alloc(vma, &start_page_nr, num_pages, 1);
-        if (err) {
                gk20a_err(dev_from_vm(vm),
-                           "%s oom: sz=0x%llx", vma->name, size);
+                          "%s oom: sz=0x%llx", vma->name, size);
                return 0;
        }
-        offset = (u64)start_page_nr <<
-                 ilog2(vm->gmmu_page_sizes[gmmu_pgsz_idx]);
        gk20a_dbg_fn("%s found addr: 0x%llx", vma->name, offset);
        return offset;
 }
@@ -954,25 +883,12 @@ int gk20a_vm_free_va(struct vm_gk20a *vm,
                     enum gmmu_pgsz_gk20a pgsz_idx)
 {
        struct gk20a_allocator *vma = &vm->vma[pgsz_idx];
-        u32 page_size = vm->gmmu_page_sizes[pgsz_idx];
-        u32 page_shift = ilog2(page_size);
-        u32 start_page_nr, num_pages;
-        int err;
        gk20a_dbg_info("%s free addr=0x%llx, size=0x%llx",
                        vma->name, offset, size);
+        gk20a_bfree(vma, offset);
-        start_page_nr = (u32)(offset >> page_shift);
+        return 0;
-        num_pages = (u32)((size + page_size - 1) >> page_shift);
-        err = vma->free(vma, start_page_nr, num_pages, 1);
-        if (err) {
-                gk20a_err(dev_from_vm(vm),
-                           "not found: offset=0x%llx, sz=0x%llx",
-                           offset, size);
-        }
-        return err;
 }
 static int insert_mapped_buffer(struct rb_root *root,
@@ -1169,7 +1085,7 @@ static int validate_fixed_buffer(struct vm_gk20a *vm,
        if (map_offset & (vm->gmmu_page_sizes[bfr->pgsz_idx] - 1)) {
                gk20a_err(dev, "map offset must be buffer page size aligned 0x%llx",
-                           map_offset);
+                          map_offset);
                return -EINVAL;
        }
@@ -2613,7 +2529,6 @@ int gk20a_init_vm(struct mm_gk20a *mm,
                char *name)
 {
        int err, i;
-        u32 num_small_pages, num_large_pages, low_hole_pages;
        char alloc_name[32];
        u64 small_vma_size, large_vma_size;
        u32 pde_lo, pde_hi;
@@ -2674,34 +2589,31 @@ int gk20a_init_vm(struct mm_gk20a *mm,
                large_vma_size = vm->va_limit - small_vma_size;
        }
-        num_small_pages = (u32)(small_vma_size >>
-                    ilog2(vm->gmmu_page_sizes[gmmu_page_size_small]));
-        /* num_pages above is without regard to the low-side hole. */
-        low_hole_pages = (vm->va_start >>
-                          ilog2(vm->gmmu_page_sizes[gmmu_page_size_small]));
        snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", name,
                 vm->gmmu_page_sizes[gmmu_page_size_small]>>10);
-        err = gk20a_allocator_init(&vm->vma[gmmu_page_size_small],
+        err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_small],
-                             alloc_name,
+                                     vm, alloc_name,
-                             low_hole_pages,             /*start*/
+                                     vm->va_start,
-                             num_small_pages - low_hole_pages);/* length*/
+                                     small_vma_size - vm->va_start,
+                                     SZ_4K,
+                                     GPU_BALLOC_MAX_ORDER,
+                                     GPU_BALLOC_GVA_SPACE);
        if (err)
                goto clean_up_ptes;
        if (big_pages) {
-                u32 start = (u32)(small_vma_size >>
-                            ilog2(vm->gmmu_page_sizes[gmmu_page_size_big]));
-                num_large_pages = (u32)(large_vma_size >>
-                            ilog2(vm->gmmu_page_sizes[gmmu_page_size_big]));
                snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB",
                         name, vm->gmmu_page_sizes[gmmu_page_size_big]>>10);
-                err = gk20a_allocator_init(&vm->vma[gmmu_page_size_big],
+                /*
-                                      alloc_name,
+                 * Big page VMA starts at the end of the small page VMA.
-                                      start,                    /* start */
+                 */
-                                      num_large_pages);         /* length */
+                err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_big],
+                                             vm, alloc_name,
+                                             small_vma_size,
+                                             large_vma_size,
+                                             big_page_size,
+                                             GPU_BALLOC_MAX_ORDER,
+                                             GPU_BALLOC_GVA_SPACE);
                if (err)
                        goto clean_up_small_allocator;
        }
@@ -2782,9 +2694,9 @@ int gk20a_vm_release_share(struct gk20a_as_share *as_share)
 int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
                         struct nvgpu_as_alloc_space_args *args)
-{       int err = -ENOMEM;
+{
+        int err = -ENOMEM;
        int pgsz_idx = gmmu_page_size_small;
-        u32 start_page_nr;
        struct gk20a_allocator *vma;
        struct vm_gk20a *vm = as_share->vm;
        struct gk20a *g = vm->mm->g;
@@ -2815,21 +2727,19 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
                goto clean_up;
        }
-        start_page_nr = 0;
+        vma = &vm->vma[pgsz_idx];
        if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET)
-                start_page_nr = (u32)(args->o_a.offset >>
+                vaddr_start = gk20a_balloc_fixed(vma, args->o_a.offset,
-                                ilog2(vm->gmmu_page_sizes[pgsz_idx]));
+                                                 (u64)args->pages *
+                                                 (u64)args->page_size);
+        else
+                vaddr_start = gk20a_balloc(vma, args->pages * args->page_size);
-        vma = &vm->vma[pgsz_idx];
+        if (!vaddr_start) {
-        err = vma->alloc(vma, &start_page_nr, args->pages, 1);
-        if (err) {
                kfree(va_node);
                goto clean_up;
        }
-        vaddr_start = (u64)start_page_nr <<
-                      ilog2(vm->gmmu_page_sizes[pgsz_idx]);
        va_node->vaddr_start = vaddr_start;
        va_node->size = (u64)args->page_size * (u64)args->pages;
        va_node->pgsz_idx = pgsz_idx;
@@ -2853,7 +2763,7 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
                                         true);
                if (!map_offset) {
                        mutex_unlock(&vm->update_gmmu_lock);
-                        vma->free(vma, start_page_nr, args->pages, 1);
+                        gk20a_bfree(vma, vaddr_start);
                        kfree(va_node);
                        goto clean_up;
                }
@@ -2865,6 +2775,7 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
        mutex_unlock(&vm->update_gmmu_lock);
        args->o_a.offset = vaddr_start;
+        err = 0;
 clean_up:
        return err;
@@ -2875,7 +2786,6 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
 {
        int err = -ENOMEM;
        int pgsz_idx;
-        u32 start_page_nr;
        struct gk20a_allocator *vma;
        struct vm_gk20a *vm = as_share->vm;
        struct vm_reserved_va_node *va_node;
@@ -2888,14 +2798,8 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
        pgsz_idx = __nv_gmmu_va_is_upper(vm, args->offset) ?
                        gmmu_page_size_big : gmmu_page_size_small;
-        start_page_nr = (u32)(args->offset >>
-                        ilog2(vm->gmmu_page_sizes[pgsz_idx]));
        vma = &vm->vma[pgsz_idx];
-        err = vma->free(vma, start_page_nr, args->pages, 1);
+        gk20a_bfree(vma, args->offset);
-        if (err)
-                goto clean_up;
        mutex_lock(&vm->update_gmmu_lock);
        va_node = addr_to_reservation(vm, args->offset);
@@ -2925,8 +2829,8 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
                kfree(va_node);
        }
        mutex_unlock(&vm->update_gmmu_lock);
+        err = 0;
-clean_up:
        return err;
 }
author	Bharat Nihalani <bnihalani@nvidia.com>	2015-06-04 08:08:59 -0400
committer	Terje Bergstrom <tbergstrom@nvidia.com>	2015-06-04 13:41:00 -0400
commit	b8aa486109a43a8c92159b0845a4adc9f6a84654 (patch)
tree	9086e63713df6a63d4ad152503c49e238d23c7d3 /drivers/gpu/nvgpu/gk20a/mm_gk20a.c
parent	56d7896731c91595db3205777f308fbcaeac7340 (diff)