1 files changed, 55 insertions, 21 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 738df2af..7a02d68e 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -1,7 +1,7 @@
 /*
 * GK20A memory management
 *
- * Copyright (c) 2011-2017, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2011-2016, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -323,7 +323,7 @@ static int gk20a_alloc_comptags(struct gk20a *g,
        if (err)
                return err;
-        /* 
+        /*
         * offset needs to be at the start of a page/cacheline boundary;
         * prune the preceding ctaglines that were allocated for alignment.
         */
@@ -1290,12 +1290,6 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
        int ctag_granularity = g->ops.fb.compression_page_size(g);
        u32 ctag_lines = DIV_ROUND_UP_ULL(size, ctag_granularity);
-        if (clear_ctags && ctag_offset) {
-                /* init/clear the ctag buffer */
-                g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_clear,
-                                ctag_offset, ctag_offset + ctag_lines - 1);
-        }
        /* Allocate (or validate when map_offset != 0) the virtual address. */
        if (!map_offset) {
                map_offset = gk20a_vm_alloc_va(vm, size,
@@ -1651,17 +1645,14 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
                        bfr.kind_v = bfr.uc_kind_v;
                } else {
                        gk20a_get_comptags(d, dmabuf, &comptags);
-                        clear_ctags = true;
-                        if (comptags.lines < comptags.allocated_lines) {
-                                /* clear tail-padding comptags */
-                                u32 ctagmin = comptags.offset + comptags.lines;
-                                u32 ctagmax = comptags.offset +
-                                        comptags.allocated_lines - 1;
+                        if (g->ops.ltc.cbc_ctrl)
                                g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_clear,
-                                                    ctagmin, ctagmax);
+                                                    comptags.offset,
-                        }
+                                                    comptags.offset +
+                                                        comptags.allocated_lines - 1);
+                        else
+                                clear_ctags = true;
                }
        }
@@ -2815,6 +2806,7 @@ int gk20a_init_vm(struct mm_gk20a *mm,
        u64 small_vma_start, small_vma_limit, large_vma_start, large_vma_limit,
                kernel_vma_start, kernel_vma_limit;
        u32 pde_lo, pde_hi;
+        struct gk20a *g = mm->g;
        /* note: this must match gmmu_pgsz_gk20a enum */
        u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, big_page_size, SZ_4K };
@@ -2904,6 +2896,31 @@ int gk20a_init_vm(struct mm_gk20a *mm,
                goto clean_up_pdes;
        }
+        /*
+         * Attempt to make a separate VM for fixed allocations.
+         */
+        if (g->separate_fixed_allocs &&
+            small_vma_start < small_vma_limit) {
+                if (g->separate_fixed_allocs >= small_vma_limit)
+                        goto clean_up_pdes;
+                snprintf(alloc_name, sizeof(alloc_name),
+                         "gk20a_%s-fixed", name);
+                err = __gk20a_allocator_init(&vm->fixed,
+                                             vm, alloc_name,
+                                             small_vma_start,
+                                             g->separate_fixed_allocs,
+                                             SZ_4K,
+                                             GPU_BALLOC_MAX_ORDER,
+                                             GPU_BALLOC_GVA_SPACE);
+                if (err)
+                        goto clean_up_ptes;
+                /* Make sure to update the user vma size. */
+                small_vma_start = g->separate_fixed_allocs;
+        }
        if (small_vma_start < small_vma_limit) {
                snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", name,
                         vm->gmmu_page_sizes[gmmu_page_size_small] >> 10);
@@ -3066,14 +3083,17 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
        }
        vma = &vm->vma[pgsz_idx];
-        if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET)
+        if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET) {
+                if (vm->fixed.init)
+                        vma = &vm->fixed;
                vaddr_start = gk20a_balloc_fixed(vma, args->o_a.offset,
                                                 (u64)args->pages *
                                                 (u64)args->page_size);
-        else
+        } else {
                vaddr_start = gk20a_balloc(vma,
                                           (u64)args->pages *
                                           (u64)args->page_size);
+        }
        if (!vaddr_start) {
                kfree(va_node);
@@ -3140,7 +3160,10 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
        pgsz_idx = __nv_gmmu_va_is_big_page_region(vm, args->offset) ?
                        gmmu_page_size_big : gmmu_page_size_small;
-        vma = &vm->vma[pgsz_idx];
+        if (vm->fixed.init)
+                vma = &vm->fixed;
+        else
+                vma = &vm->vma[pgsz_idx];
        gk20a_bfree(vma, args->offset);
        mutex_lock(&vm->update_gmmu_lock);
@@ -3330,6 +3353,8 @@ void gk20a_deinit_vm(struct vm_gk20a *vm)
                gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]);
        if (vm->vma[gmmu_page_size_small].init)
                gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]);
+        if (vm->fixed.init)
+                gk20a_allocator_destroy(&vm->fixed);
        gk20a_vm_free_entries(vm, &vm->pdb, 0);
 }
@@ -3843,6 +3868,16 @@ clean_up:
        return err;
 }
+void gk20a_mm_debugfs_init(struct platform_device *pdev)
+{
+        struct gk20a_platform *platform = platform_get_drvdata(pdev);
+        struct dentry *gpu_root = platform->debugfs;
+        struct gk20a *g = gk20a_get_platform(pdev)->g;
+        debugfs_create_x64("separate_fixed_allocs", 0664, gpu_root,
+                           &g->separate_fixed_allocs);
+}
 void gk20a_init_mm(struct gpu_ops *gops)
 {
        gops->mm.is_debug_mode_enabled = gk20a_mm_mmu_debug_mode_enabled;
@@ -3863,4 +3898,3 @@ void gk20a_init_mm(struct gpu_ops *gops)
        gops->mm.init_pdb = gk20a_mm_init_pdb;
        gops->mm.init_mm_setup_hw = gk20a_init_mm_setup_hw;
 }