gpu: nvgpu: Conditional address space unification

Allow platforms to choose whether or not to have unified GPU VA spaces. This is useful for the dGPU where having a unified address space has no problems. On iGPUs testing issues is getting in the way of enabling this feature. Bug 1396644 Bug 1729947 Change-Id: I65985f1f9a818f4b06219715cc09619911e4824b Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: http://git-master/r/1265303 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
author: Alex Waterman <alexw@nvidia.com> 2016-10-31 16:17:44 -0400
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2017-01-31 19:23:07 -0500
commit: 95a3eb454ca2f1b1b1a5677fe024eacae79bc0ed (patch)
tree: 6b3daeb6e2f8ac5828faf1ddc448f61de8954b70 /drivers
parent: b9b94c073ce551935be1c00cb8e756ad5ce5c631 (diff)
3 files changed, 167 insertions, 104 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index b04a7e87..e589e312 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -1788,12 +1788,6 @@ struct buffer_attrs {
        bool ctag_user_mappable;
 };
-static void gmmu_select_page_size(struct vm_gk20a *vm,
-                                  struct buffer_attrs *bfr)
-{
-        bfr->pgsz_idx = __get_pte_size(vm, 0, bfr->size);
-}
 static int setup_buffer_kind_and_compression(struct vm_gk20a *vm,
                                             u32 flags,
                                             struct buffer_attrs *bfr,
@@ -2411,7 +2405,6 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
        struct device *d = dev_from_vm(vm);
        struct mapped_buffer_node *mapped_buffer = NULL;
        bool inserted = false, va_allocated = false;
-        u32 gmmu_page_size = 0;
        u64 map_offset = 0;
        int err = 0;
        struct buffer_attrs bfr = {NULL};
@@ -2482,42 +2475,7 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
                                      min_t(u64, bfr.size, bfr.align));
        mapping_size = mapping_size ? mapping_size : bfr.size;
-        if (vm->big_pages)
-                gmmu_select_page_size(vm, &bfr);
-        else
-                bfr.pgsz_idx = gmmu_page_size_small;
-        /* If FIX_OFFSET is set, pgsz is determined at address allocation
-         * time. The alignment at address alloc time must be the same as
-         * the alignment determined by gmmu_select_page_size().
-         */
-        if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
-                int pgsz_idx = __get_pte_size(vm, offset_align, mapping_size);
-                if (pgsz_idx > bfr.pgsz_idx) {
-                        gk20a_err(d, "%llx buffer pgsz %d, VA pgsz %d",
-                                  offset_align, bfr.pgsz_idx, pgsz_idx);
-                        err = -EINVAL;
-                        goto clean_up;
-                }
-                bfr.pgsz_idx = min(bfr.pgsz_idx, pgsz_idx);
-        }
-        /* validate/adjust bfr attributes */
-        if (unlikely(bfr.pgsz_idx == -1)) {
-                gk20a_err(d, "unsupported page size detected");
-                goto clean_up;
-        }
-        if (unlikely(bfr.pgsz_idx < gmmu_page_size_small ||
-                     bfr.pgsz_idx > gmmu_page_size_big)) {
-                BUG_ON(1);
-                err = -EINVAL;
-                goto clean_up;
-        }
-        gmmu_page_size = vm->gmmu_page_sizes[bfr.pgsz_idx];
        /* Check if we should use a fixed offset for mapping this buffer */
        if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)  {
                err = validate_fixed_buffer(vm, &bfr,
                                            offset_align, mapping_size,
@@ -4289,7 +4247,7 @@ int gk20a_big_pages_possible(struct vm_gk20a *vm, u64 base, u64 size)
 /*
 * Attempt to find a reserved memory area to determine PTE size for the passed
- * mapping. If no reserved area can be found use small pages but drop a warning.
+ * mapping. If no reserved area can be found use small pages.
 */
 enum gmmu_pgsz_gk20a __get_pte_size_fixed_map(struct vm_gk20a *vm,
                                              u64 base, u64 size)
@@ -4303,6 +4261,60 @@ enum gmmu_pgsz_gk20a __get_pte_size_fixed_map(struct vm_gk20a *vm,
        return node->pgsz_idx;
 }
+/*
+ * This is for when the address space does not support unified address spaces.
+ */
+enum gmmu_pgsz_gk20a __get_pte_size_split_addr(struct vm_gk20a *vm,
+                                               u64 base, u64 size)
+{
+        if (!base) {
+                if (size >= vm->gmmu_page_sizes[gmmu_page_size_big])
+                        return gmmu_page_size_big;
+                return gmmu_page_size_small;
+        } else {
+                if (base < __nv_gmmu_va_small_page_limit())
+                        return gmmu_page_size_small;
+                else
+                        return gmmu_page_size_big;
+        }
+}
+/*
+ * This determines the PTE size for a given alloc. Used by both the GVA space
+ * allocator and the mm core code so that agreement can be reached on how to
+ * map allocations.
+ *
+ * The page size of a buffer is this:
+ *
+ *   o  If the VM doesn't support large pages then obviously small pages
+ *      must be used.
+ *   o  If the base address is non-zero (fixed address map):
+ *      - Attempt to find a reserved memory area and use the page size
+ *        based on that.
+ *      - If no reserved page size is available, default to small pages.
+ *   o  If the base is zero:
+ *      - If the size is larger than or equal to the big page size, use big
+ *        pages.
+ *      - Otherwise use small pages.
+ */
+enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm, u64 base, u64 size)
+{
+        struct gk20a_platform *p = gk20a_get_platform(vm->mm->g->dev);
+        if (!vm->big_pages)
+                return gmmu_page_size_small;
+        if (!p->unify_address_spaces)
+                return __get_pte_size_split_addr(vm, base, size);
+        if (base)
+                return __get_pte_size_fixed_map(vm, base, size);
+        if (size >= vm->gmmu_page_sizes[gmmu_page_size_big])
+                return gmmu_page_size_big;
+        return gmmu_page_size_small;
+}
 static int init_vm_page_tables(struct vm_gk20a *vm)
 {
        u32 pde_lo, pde_hi;
@@ -4376,15 +4388,20 @@ int gk20a_init_vm(struct mm_gk20a *mm,
 {
        int err;
        char alloc_name[32];
+        u64 kernel_vma_flags;
        u64 user_vma_start, user_vma_limit;
+        u64 user_lp_vma_start, user_lp_vma_limit;
        u64 kernel_vma_start, kernel_vma_limit;
        struct gk20a *g = mm->g;
+        struct gk20a_platform *p = gk20a_get_platform(g->dev);
        if (WARN_ON(kernel_reserved + low_hole > aperture_size))
                return -ENOMEM;
-        gk20a_dbg_info("Init space for %s: va_limit=0x%llx",
+        gk20a_dbg_info("Init space for %s: va_limit=0x%llx, "
-                       name, vm->va_limit);
+                       "big_page_size=0x%x low_hole=0x%llx",
+                       name, aperture_size,
+                       (unsigned int)big_page_size, low_hole);
        vm->mm = mm;
@@ -4393,9 +4410,11 @@ int gk20a_init_vm(struct mm_gk20a *mm,
        vm->gmmu_page_sizes[gmmu_page_size_kernel] = SZ_4K;
        /* Set up vma pointers. */
-        vm->vma[0] = &vm->user;
+        vm->vma[gmmu_page_size_small]  = &vm->user;
-        vm->vma[1] = &vm->user;
+        vm->vma[gmmu_page_size_big]    = &vm->user;
-        vm->vma[2] = &vm->kernel;
+        vm->vma[gmmu_page_size_kernel] = &vm->kernel;
+        if (!p->unify_address_spaces)
+                vm->vma[gmmu_page_size_big] = &vm->user_lp;
        vm->va_start  = low_hole;
        vm->va_limit  = aperture_size;
@@ -4403,27 +4422,51 @@ int gk20a_init_vm(struct mm_gk20a *mm,
        vm->big_page_size     = vm->gmmu_page_sizes[gmmu_page_size_big];
        vm->userspace_managed = userspace_managed;
-        vm->mmu_levels        = g->ops.mm.get_mmu_levels(g, big_page_size);
+        vm->mmu_levels        = g->ops.mm.get_mmu_levels(g, vm->big_page_size);
        /* Initialize the page table data structures. */
        err = init_vm_page_tables(vm);
        if (err)
                return err;
-        /* setup vma limits */
+        /* Setup vma limits. */
-        user_vma_start = low_hole;
+        if (kernel_reserved + low_hole < aperture_size) {
-        user_vma_limit = vm->va_limit - kernel_reserved;
+                if (p->unify_address_spaces) {
+                        user_vma_start = low_hole;
+                        user_vma_limit = vm->va_limit - kernel_reserved;
+                        user_lp_vma_start = user_vma_limit;
+                        user_lp_vma_limit = user_vma_limit;
+                } else {
+                        user_vma_start = low_hole;
+                        user_vma_limit = __nv_gmmu_va_small_page_limit();
+                        user_lp_vma_start = __nv_gmmu_va_small_page_limit();
+                        user_lp_vma_limit = vm->va_limit - kernel_reserved;
+                }
+        } else {
+                user_vma_start = 0;
+                user_vma_limit = 0;
+                user_lp_vma_start = 0;
+                user_lp_vma_limit = 0;
+        }
        kernel_vma_start = vm->va_limit - kernel_reserved;
        kernel_vma_limit = vm->va_limit;
-        gk20a_dbg_info(
+        gk20a_dbg_info("user_vma     [0x%llx,0x%llx)",
-                "user_vma=[0x%llx,0x%llx) kernel_vma=[0x%llx,0x%llx)\n",
+                       user_vma_start, user_vma_limit);
-                user_vma_start, user_vma_limit,
+        gk20a_dbg_info("user_lp_vma  [0x%llx,0x%llx)",
-                kernel_vma_start, kernel_vma_limit);
+                       user_lp_vma_start, user_lp_vma_limit);
+        gk20a_dbg_info("kernel_vma   [0x%llx,0x%llx)",
+                       kernel_vma_start, kernel_vma_limit);
+        if (WARN_ON(user_vma_start > user_vma_limit) ||
+            WARN_ON(user_lp_vma_start > user_lp_vma_limit) ||
+            WARN_ON(kernel_vma_start >= kernel_vma_limit)) {
+                err = -EINVAL;
+                goto clean_up_page_tables;
+        }
-        WARN_ON(user_vma_start > user_vma_limit);
+        kernel_vma_flags = (kernel_reserved + low_hole) == aperture_size ?
-        WARN_ON(kernel_vma_start >= kernel_vma_limit);
+                0 : GPU_ALLOC_GVA_SPACE;
        /*
         * A "user" area only makes sense for the GVA spaces. For VMs where
@@ -4437,12 +4480,22 @@ int gk20a_init_vm(struct mm_gk20a *mm,
                goto clean_up_page_tables;
        }
+        /*
+         * Determine if big pages are possible in this VM. If a split address
+         * space is used then check the user_lp vma instead of the user vma.
+         */
+        if (p->unify_address_spaces)
+                vm->big_pages = gk20a_big_pages_possible(vm, user_vma_start,
+                                         user_vma_limit - user_vma_start);
+        else
+                vm->big_pages = gk20a_big_pages_possible(vm, user_lp_vma_start,
+                                         user_lp_vma_limit - user_lp_vma_start);
+        /*
+         * User VMA.
+         */
        if (user_vma_start < user_vma_limit) {
                snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s", name);
-                if (!gk20a_big_pages_possible(vm, user_vma_start,
-                                             user_vma_limit - user_vma_start))
-                        vm->big_pages = false;
                err = __nvgpu_buddy_allocator_init(g, &vm->user,
                                                   vm, alloc_name,
                                                   user_vma_start,
@@ -4463,20 +4516,36 @@ int gk20a_init_vm(struct mm_gk20a *mm,
                vm->vma[1] = &vm->kernel;
        }
-        snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-sys", name);
+        /*
-        if (!gk20a_big_pages_possible(vm, kernel_vma_start,
+         * User VMA for large pages when a split address range is used.
-                                     kernel_vma_limit - kernel_vma_start))
+         */
-                vm->big_pages = false;
+        if (user_lp_vma_start < user_lp_vma_limit) {
+                snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s_lp", name);
+                err = __nvgpu_buddy_allocator_init(g, &vm->user_lp,
+                                                   vm, alloc_name,
+                                                   user_lp_vma_start,
+                                                   user_lp_vma_limit -
+                                                   user_lp_vma_start,
+                                                   vm->big_page_size,
+                                                   GPU_BALLOC_MAX_ORDER,
+                                                   GPU_ALLOC_GVA_SPACE);
+                if (err)
+                        goto clean_up_allocators;
+        }
+        /*
+         * Kernel VMA. Must always exist for an address space.
+         */
+        snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-sys", name);
        err = __nvgpu_buddy_allocator_init(g, &vm->kernel,
                                           vm, alloc_name,
                                           kernel_vma_start,
                                           kernel_vma_limit - kernel_vma_start,
                                           SZ_4K,
                                           GPU_BALLOC_MAX_ORDER,
-                                           GPU_ALLOC_GVA_SPACE);
+                                           kernel_vma_flags);
        if (err)
-                goto clean_up_user_allocator;
+                goto clean_up_allocators;
        vm->mapped_buffers = RB_ROOT;
@@ -4492,14 +4561,18 @@ int gk20a_init_vm(struct mm_gk20a *mm,
        if (vm->va_limit > SZ_4G) {
                err = gk20a_init_sema_pool(vm);
                if (err)
-                        goto clean_up_user_allocator;
+                        goto clean_up_allocators;
        }
        return 0;
-clean_up_user_allocator:
+clean_up_allocators:
-        if (user_vma_start < user_vma_limit)
+        if (nvgpu_alloc_initialized(&vm->kernel))
+                nvgpu_alloc_destroy(&vm->kernel);
+        if (nvgpu_alloc_initialized(&vm->user))
                nvgpu_alloc_destroy(&vm->user);
+        if (nvgpu_alloc_initialized(&vm->user_lp))
+                nvgpu_alloc_destroy(&vm->user_lp);
 clean_up_page_tables:
        /* Cleans up init_vm_page_tables() */
        vfree(vm->pdb.entries);
@@ -4871,6 +4944,8 @@ void gk20a_deinit_vm(struct vm_gk20a *vm)
                nvgpu_alloc_destroy(&vm->kernel);
        if (nvgpu_alloc_initialized(&vm->user))
                nvgpu_alloc_destroy(&vm->user);
+        if (nvgpu_alloc_initialized(&vm->user_lp))
+                nvgpu_alloc_destroy(&vm->user_lp);
        gk20a_vm_free_entries(vm, &vm->pdb, 0);
 }
@@ -5000,7 +5075,7 @@ static int gk20a_init_cde_vm(struct mm_gk20a *mm)
        u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size;
        return gk20a_init_vm(mm, vm, big_page_size,
-                        SZ_4K * 16,
+                        big_page_size << 10,
                        NV_MM_DEFAULT_KERNEL_SIZE,
                        NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
                        false, false, "cde");
@@ -5013,7 +5088,7 @@ static int gk20a_init_ce_vm(struct mm_gk20a *mm)
        u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size;
        return gk20a_init_vm(mm, vm, big_page_size,
-                        SZ_4K * 16,
+                        big_page_size << 10,
                        NV_MM_DEFAULT_KERNEL_SIZE,
                        NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
                        false, false, "ce");
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index f362e27c..665aea42 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -270,9 +270,16 @@ struct vm_gk20a {
        struct gk20a_mm_entry pdb;
+        /*
+         * These structs define the address spaces. In some cases it's possible
+         * to merge address spaces (user and user_lp) and in other cases it's
+         * not. vma[] allows the code to be agnostic to this by always using
+         * address spaces through this pointer array.
+         */
        struct nvgpu_allocator *vma[gmmu_nr_page_sizes];
        struct nvgpu_allocator kernel;
        struct nvgpu_allocator user;
+        struct nvgpu_allocator user_lp;
        struct rb_root mapped_buffers;
@@ -433,41 +440,19 @@ static inline int bar1_aperture_size_mb_gk20a(void)
 /* The default kernel-reserved GPU VA size */
 #define NV_MM_DEFAULT_KERNEL_SIZE (1ULL << 32)
-enum gmmu_pgsz_gk20a __get_pte_size_fixed_map(struct vm_gk20a *vm,
-                                              u64 base, u64 size);
 /*
- * This determines the PTE size for a given alloc. Used by both the GVA space
+ * When not using unified address spaces the bottom 16GB of the space are used
- * allocator and the mm core code so that agreement can be reached on how to
+ * for small pages and the remaining high memory is used for large pages.
- * map allocations.
- *
- * The page size of a buffer is this:
- *
- *   o  If the VM doesn't support large pages then obviously small pages
- *      must be used.
- *   o  If the base address is non-zero (fixed address map):
- *      - Attempt to find a reserved memory area and use the page size
- *        based on that.
- *      - If no reserved page size is available, default to small pages.
- *   o  If the base is zero:
- *      - If the size is greater than or equal to the big page size, use big
- *        pages.
- *      - Otherwise use small pages.
 */
-static inline enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm,
+static inline u64 __nv_gmmu_va_small_page_limit(void)
-                                                  u64 base, u64 size)
 {
-        if (!vm->big_pages)
+        return ((u64)SZ_1G * 16);
-                return gmmu_page_size_small;
-        if (base)
-                return __get_pte_size_fixed_map(vm, base, size);
-        if (size >= vm->gmmu_page_sizes[gmmu_page_size_big])
-                return gmmu_page_size_big;
-        return gmmu_page_size_small;
 }
+enum gmmu_pgsz_gk20a __get_pte_size_fixed_map(struct vm_gk20a *vm,
+                                              u64 base, u64 size);
+enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm, u64 base, u64 size);
 /*
 * Buffer accessors - wrap between begin() and end() if there is no permanent
 * kernel mapping for this buffer.
diff --git a/drivers/gpu/nvgpu/gk20a/platform_gk20a.h b/drivers/gpu/nvgpu/gk20a/platform_gk20a.h
index e71dbf32..3c2db924 100644
--- a/drivers/gpu/nvgpu/gk20a/platform_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/platform_gk20a.h
@@ -71,6 +71,9 @@ struct gk20a_platform {
        /* set if ASPM should be disabled on boot; only makes sense for PCI */
        bool disable_aspm;
+        /* Set if the platform can unify the small/large address spaces. */
+        bool unify_address_spaces;
        /* Should be populated by probe. */
        struct dentry *debugfs;
        struct dentry *debugfs_alias;
author	Alex Waterman <alexw@nvidia.com>	2016-10-31 16:17:44 -0400
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2017-01-31 19:23:07 -0500
commit	95a3eb454ca2f1b1b1a5677fe024eacae79bc0ed (patch)
tree	6b3daeb6e2f8ac5828faf1ddc448f61de8954b70 /drivers
parent	b9b94c073ce551935be1c00cb8e756ad5ce5c631 (diff)

diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index b04a7e87..e589e312 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -1788,12 +1788,6 @@ struct buffer_attrs {
1788	bool ctag_user_mappable;	1788	bool ctag_user_mappable;
1789	};	1789	};
1790		1790
1791	static void gmmu_select_page_size(struct vm_gk20a *vm,
1792	struct buffer_attrs *bfr)
1793	{
1794	bfr->pgsz_idx = __get_pte_size(vm, 0, bfr->size);
1795	}
1796
1797	static int setup_buffer_kind_and_compression(struct vm_gk20a *vm,	1791	static int setup_buffer_kind_and_compression(struct vm_gk20a *vm,
1798	u32 flags,	1792	u32 flags,
1799	struct buffer_attrs *bfr,	1793	struct buffer_attrs *bfr,
@@ -2411,7 +2405,6 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
2411	struct device *d = dev_from_vm(vm);	2405	struct device *d = dev_from_vm(vm);
2412	struct mapped_buffer_node *mapped_buffer = NULL;	2406	struct mapped_buffer_node *mapped_buffer = NULL;
2413	bool inserted = false, va_allocated = false;	2407	bool inserted = false, va_allocated = false;
2414	u32 gmmu_page_size = 0;
2415	u64 map_offset = 0;	2408	u64 map_offset = 0;
2416	int err = 0;	2409	int err = 0;
2417	struct buffer_attrs bfr = {NULL};	2410	struct buffer_attrs bfr = {NULL};
@@ -2482,42 +2475,7 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
2482	min_t(u64, bfr.size, bfr.align));	2475	min_t(u64, bfr.size, bfr.align));
2483	mapping_size = mapping_size ? mapping_size : bfr.size;	2476	mapping_size = mapping_size ? mapping_size : bfr.size;
2484		2477
2485	if (vm->big_pages)
2486	gmmu_select_page_size(vm, &bfr);
2487	else
2488	bfr.pgsz_idx = gmmu_page_size_small;
2489
2490	/* If FIX_OFFSET is set, pgsz is determined at address allocation
2491	* time. The alignment at address alloc time must be the same as
2492	* the alignment determined by gmmu_select_page_size().
2493	*/
2494	if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
2495	int pgsz_idx = __get_pte_size(vm, offset_align, mapping_size);
2496	if (pgsz_idx > bfr.pgsz_idx) {
2497	gk20a_err(d, "%llx buffer pgsz %d, VA pgsz %d",
2498	offset_align, bfr.pgsz_idx, pgsz_idx);
2499	err = -EINVAL;
2500	goto clean_up;
2501	}
2502	bfr.pgsz_idx = min(bfr.pgsz_idx, pgsz_idx);
2503	}
2504
2505	/* validate/adjust bfr attributes */
2506	if (unlikely(bfr.pgsz_idx == -1)) {
2507	gk20a_err(d, "unsupported page size detected");
2508	goto clean_up;
2509	}
2510
2511	if (unlikely(bfr.pgsz_idx < gmmu_page_size_small \|\|
2512	bfr.pgsz_idx > gmmu_page_size_big)) {
2513	BUG_ON(1);
2514	err = -EINVAL;
2515	goto clean_up;
2516	}
2517	gmmu_page_size = vm->gmmu_page_sizes[bfr.pgsz_idx];
2518
2519	/* Check if we should use a fixed offset for mapping this buffer */	2478	/* Check if we should use a fixed offset for mapping this buffer */
2520
2521	if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {	2479	if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
2522	err = validate_fixed_buffer(vm, &bfr,	2480	err = validate_fixed_buffer(vm, &bfr,
2523	offset_align, mapping_size,	2481	offset_align, mapping_size,
@@ -4289,7 +4247,7 @@ int gk20a_big_pages_possible(struct vm_gk20a *vm, u64 base, u64 size)
4289		4247
4290	/*	4248	/*
4291	* Attempt to find a reserved memory area to determine PTE size for the passed	4249	* Attempt to find a reserved memory area to determine PTE size for the passed
4292	* mapping. If no reserved area can be found use small pages but drop a warning.	4250	* mapping. If no reserved area can be found use small pages.
4293	*/	4251	*/
4294	enum gmmu_pgsz_gk20a __get_pte_size_fixed_map(struct vm_gk20a *vm,	4252	enum gmmu_pgsz_gk20a __get_pte_size_fixed_map(struct vm_gk20a *vm,
4295	u64 base, u64 size)	4253	u64 base, u64 size)
@@ -4303,6 +4261,60 @@ enum gmmu_pgsz_gk20a __get_pte_size_fixed_map(struct vm_gk20a *vm,
4303	return node->pgsz_idx;	4261	return node->pgsz_idx;
4304	}	4262	}
4305		4263
		4264	/*
		4265	* This is for when the address space does not support unified address spaces.
		4266	*/
		4267	enum gmmu_pgsz_gk20a __get_pte_size_split_addr(struct vm_gk20a *vm,
		4268	u64 base, u64 size)
		4269	{
		4270	if (!base) {
		4271	if (size >= vm->gmmu_page_sizes[gmmu_page_size_big])
		4272	return gmmu_page_size_big;
		4273	return gmmu_page_size_small;
		4274	} else {
		4275	if (base < __nv_gmmu_va_small_page_limit())
		4276	return gmmu_page_size_small;
		4277	else
		4278	return gmmu_page_size_big;
		4279	}
		4280	}
		4281
		4282	/*
		4283	* This determines the PTE size for a given alloc. Used by both the GVA space
		4284	* allocator and the mm core code so that agreement can be reached on how to
		4285	* map allocations.
		4286	*
		4287	* The page size of a buffer is this:
		4288	*
		4289	* o If the VM doesn't support large pages then obviously small pages
		4290	* must be used.
		4291	* o If the base address is non-zero (fixed address map):
		4292	* - Attempt to find a reserved memory area and use the page size
		4293	* based on that.
		4294	* - If no reserved page size is available, default to small pages.
		4295	* o If the base is zero:
		4296	* - If the size is larger than or equal to the big page size, use big
		4297	* pages.
		4298	* - Otherwise use small pages.
		4299	*/
		4300	enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm, u64 base, u64 size)
		4301	{
		4302	struct gk20a_platform *p = gk20a_get_platform(vm->mm->g->dev);
		4303
		4304	if (!vm->big_pages)
		4305	return gmmu_page_size_small;
		4306
		4307	if (!p->unify_address_spaces)
		4308	return __get_pte_size_split_addr(vm, base, size);
		4309
		4310	if (base)
		4311	return __get_pte_size_fixed_map(vm, base, size);
		4312
		4313	if (size >= vm->gmmu_page_sizes[gmmu_page_size_big])
		4314	return gmmu_page_size_big;
		4315	return gmmu_page_size_small;
		4316	}
		4317
4306	static int init_vm_page_tables(struct vm_gk20a *vm)	4318	static int init_vm_page_tables(struct vm_gk20a *vm)
4307	{	4319	{
4308	u32 pde_lo, pde_hi;	4320	u32 pde_lo, pde_hi;
@@ -4376,15 +4388,20 @@ int gk20a_init_vm(struct mm_gk20a *mm,
4376	{	4388	{
4377	int err;	4389	int err;
4378	char alloc_name[32];	4390	char alloc_name[32];
		4391	u64 kernel_vma_flags;
4379	u64 user_vma_start, user_vma_limit;	4392	u64 user_vma_start, user_vma_limit;
		4393	u64 user_lp_vma_start, user_lp_vma_limit;
4380	u64 kernel_vma_start, kernel_vma_limit;	4394	u64 kernel_vma_start, kernel_vma_limit;
4381	struct gk20a *g = mm->g;	4395	struct gk20a *g = mm->g;
		4396	struct gk20a_platform *p = gk20a_get_platform(g->dev);
4382		4397
4383	if (WARN_ON(kernel_reserved + low_hole > aperture_size))	4398	if (WARN_ON(kernel_reserved + low_hole > aperture_size))
4384	return -ENOMEM;	4399	return -ENOMEM;
4385		4400
4386	gk20a_dbg_info("Init space for %s: va_limit=0x%llx",	4401	gk20a_dbg_info("Init space for %s: va_limit=0x%llx, "
4387	name, vm->va_limit);	4402	"big_page_size=0x%x low_hole=0x%llx",
		4403	name, aperture_size,
		4404	(unsigned int)big_page_size, low_hole);
4388		4405
4389	vm->mm = mm;	4406	vm->mm = mm;
4390		4407
@@ -4393,9 +4410,11 @@ int gk20a_init_vm(struct mm_gk20a *mm,
4393	vm->gmmu_page_sizes[gmmu_page_size_kernel] = SZ_4K;	4410	vm->gmmu_page_sizes[gmmu_page_size_kernel] = SZ_4K;
4394		4411
4395	/* Set up vma pointers. */	4412	/* Set up vma pointers. */
4396	vm->vma[0] = &vm->user;	4413	vm->vma[gmmu_page_size_small] = &vm->user;
4397	vm->vma[1] = &vm->user;	4414	vm->vma[gmmu_page_size_big] = &vm->user;
4398	vm->vma[2] = &vm->kernel;	4415	vm->vma[gmmu_page_size_kernel] = &vm->kernel;
		4416	if (!p->unify_address_spaces)
		4417	vm->vma[gmmu_page_size_big] = &vm->user_lp;
4399		4418
4400	vm->va_start = low_hole;	4419	vm->va_start = low_hole;
4401	vm->va_limit = aperture_size;	4420	vm->va_limit = aperture_size;
@@ -4403,27 +4422,51 @@ int gk20a_init_vm(struct mm_gk20a *mm,
4403		4422
4404	vm->big_page_size = vm->gmmu_page_sizes[gmmu_page_size_big];	4423	vm->big_page_size = vm->gmmu_page_sizes[gmmu_page_size_big];
4405	vm->userspace_managed = userspace_managed;	4424	vm->userspace_managed = userspace_managed;
4406	vm->mmu_levels = g->ops.mm.get_mmu_levels(g, big_page_size);	4425	vm->mmu_levels = g->ops.mm.get_mmu_levels(g, vm->big_page_size);
4407		4426
4408	/* Initialize the page table data structures. */	4427	/* Initialize the page table data structures. */
4409	err = init_vm_page_tables(vm);	4428	err = init_vm_page_tables(vm);
4410	if (err)	4429	if (err)
4411	return err;	4430	return err;
4412		4431
4413	/* setup vma limits */	4432	/* Setup vma limits. */
4414	user_vma_start = low_hole;	4433	if (kernel_reserved + low_hole < aperture_size) {
4415	user_vma_limit = vm->va_limit - kernel_reserved;	4434	if (p->unify_address_spaces) {
4416		4435	user_vma_start = low_hole;
		4436	user_vma_limit = vm->va_limit - kernel_reserved;
		4437	user_lp_vma_start = user_vma_limit;
		4438	user_lp_vma_limit = user_vma_limit;
		4439	} else {
		4440	user_vma_start = low_hole;
		4441	user_vma_limit = __nv_gmmu_va_small_page_limit();
		4442	user_lp_vma_start = __nv_gmmu_va_small_page_limit();
		4443	user_lp_vma_limit = vm->va_limit - kernel_reserved;
		4444	}
		4445	} else {
		4446	user_vma_start = 0;
		4447	user_vma_limit = 0;
		4448	user_lp_vma_start = 0;
		4449	user_lp_vma_limit = 0;
		4450	}
4417	kernel_vma_start = vm->va_limit - kernel_reserved;	4451	kernel_vma_start = vm->va_limit - kernel_reserved;
4418	kernel_vma_limit = vm->va_limit;	4452	kernel_vma_limit = vm->va_limit;
4419		4453
4420	gk20a_dbg_info(	4454	gk20a_dbg_info("user_vma [0x%llx,0x%llx)",
4421	"user_vma=[0x%llx,0x%llx) kernel_vma=[0x%llx,0x%llx)\n",	4455	user_vma_start, user_vma_limit);
4422	user_vma_start, user_vma_limit,	4456	gk20a_dbg_info("user_lp_vma [0x%llx,0x%llx)",
4423	kernel_vma_start, kernel_vma_limit);	4457	user_lp_vma_start, user_lp_vma_limit);
		4458	gk20a_dbg_info("kernel_vma [0x%llx,0x%llx)",
		4459	kernel_vma_start, kernel_vma_limit);
		4460
		4461	if (WARN_ON(user_vma_start > user_vma_limit) \|\|
		4462	WARN_ON(user_lp_vma_start > user_lp_vma_limit) \|\|
		4463	WARN_ON(kernel_vma_start >= kernel_vma_limit)) {
		4464	err = -EINVAL;
		4465	goto clean_up_page_tables;
		4466	}
4424		4467
4425	WARN_ON(user_vma_start > user_vma_limit);	4468	kernel_vma_flags = (kernel_reserved + low_hole) == aperture_size ?
4426	WARN_ON(kernel_vma_start >= kernel_vma_limit);	4469	0 : GPU_ALLOC_GVA_SPACE;
4427		4470
4428	/*	4471	/*
4429	* A "user" area only makes sense for the GVA spaces. For VMs where	4472	* A "user" area only makes sense for the GVA spaces. For VMs where
@@ -4437,12 +4480,22 @@ int gk20a_init_vm(struct mm_gk20a *mm,
4437	goto clean_up_page_tables;	4480	goto clean_up_page_tables;
4438	}	4481	}
4439		4482
		4483	/*
		4484	* Determine if big pages are possible in this VM. If a split address
		4485	* space is used then check the user_lp vma instead of the user vma.
		4486	*/
		4487	if (p->unify_address_spaces)
		4488	vm->big_pages = gk20a_big_pages_possible(vm, user_vma_start,
		4489	user_vma_limit - user_vma_start);
		4490	else
		4491	vm->big_pages = gk20a_big_pages_possible(vm, user_lp_vma_start,
		4492	user_lp_vma_limit - user_lp_vma_start);
		4493
		4494	/*
		4495	* User VMA.
		4496	*/
4440	if (user_vma_start < user_vma_limit) {	4497	if (user_vma_start < user_vma_limit) {
4441	snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s", name);	4498	snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s", name);
4442	if (!gk20a_big_pages_possible(vm, user_vma_start,
4443	user_vma_limit - user_vma_start))
4444	vm->big_pages = false;
4445
4446	err = __nvgpu_buddy_allocator_init(g, &vm->user,	4499	err = __nvgpu_buddy_allocator_init(g, &vm->user,
4447	vm, alloc_name,	4500	vm, alloc_name,
4448	user_vma_start,	4501	user_vma_start,
@@ -4463,20 +4516,36 @@ int gk20a_init_vm(struct mm_gk20a *mm,
4463	vm->vma[1] = &vm->kernel;	4516	vm->vma[1] = &vm->kernel;
4464	}	4517	}
4465		4518
4466	snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-sys", name);	4519	/*
4467	if (!gk20a_big_pages_possible(vm, kernel_vma_start,	4520	* User VMA for large pages when a split address range is used.
4468	kernel_vma_limit - kernel_vma_start))	4521	*/
4469	vm->big_pages = false;	4522	if (user_lp_vma_start < user_lp_vma_limit) {
		4523	snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s_lp", name);
		4524	err = __nvgpu_buddy_allocator_init(g, &vm->user_lp,
		4525	vm, alloc_name,
		4526	user_lp_vma_start,
		4527	user_lp_vma_limit -
		4528	user_lp_vma_start,
		4529	vm->big_page_size,
		4530	GPU_BALLOC_MAX_ORDER,
		4531	GPU_ALLOC_GVA_SPACE);
		4532	if (err)
		4533	goto clean_up_allocators;
		4534	}
4470		4535
		4536	/*
		4537	* Kernel VMA. Must always exist for an address space.
		4538	*/
		4539	snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-sys", name);
4471	err = __nvgpu_buddy_allocator_init(g, &vm->kernel,	4540	err = __nvgpu_buddy_allocator_init(g, &vm->kernel,
4472	vm, alloc_name,	4541	vm, alloc_name,
4473	kernel_vma_start,	4542	kernel_vma_start,
4474	kernel_vma_limit - kernel_vma_start,	4543	kernel_vma_limit - kernel_vma_start,
4475	SZ_4K,	4544	SZ_4K,
4476	GPU_BALLOC_MAX_ORDER,	4545	GPU_BALLOC_MAX_ORDER,
4477	GPU_ALLOC_GVA_SPACE);	4546	kernel_vma_flags);
4478	if (err)	4547	if (err)
4479	goto clean_up_user_allocator;	4548	goto clean_up_allocators;
4480		4549
4481	vm->mapped_buffers = RB_ROOT;	4550	vm->mapped_buffers = RB_ROOT;
4482		4551
@@ -4492,14 +4561,18 @@ int gk20a_init_vm(struct mm_gk20a *mm,
4492	if (vm->va_limit > SZ_4G) {	4561	if (vm->va_limit > SZ_4G) {
4493	err = gk20a_init_sema_pool(vm);	4562	err = gk20a_init_sema_pool(vm);
4494	if (err)	4563	if (err)
4495	goto clean_up_user_allocator;	4564	goto clean_up_allocators;
4496	}	4565	}
4497		4566
4498	return 0;	4567	return 0;
4499		4568
4500	clean_up_user_allocator:	4569	clean_up_allocators:
4501	if (user_vma_start < user_vma_limit)	4570	if (nvgpu_alloc_initialized(&vm->kernel))
		4571	nvgpu_alloc_destroy(&vm->kernel);
		4572	if (nvgpu_alloc_initialized(&vm->user))
4502	nvgpu_alloc_destroy(&vm->user);	4573	nvgpu_alloc_destroy(&vm->user);
		4574	if (nvgpu_alloc_initialized(&vm->user_lp))
		4575	nvgpu_alloc_destroy(&vm->user_lp);
4503	clean_up_page_tables:	4576	clean_up_page_tables:
4504	/* Cleans up init_vm_page_tables() */	4577	/* Cleans up init_vm_page_tables() */
4505	vfree(vm->pdb.entries);	4578	vfree(vm->pdb.entries);
@@ -4871,6 +4944,8 @@ void gk20a_deinit_vm(struct vm_gk20a *vm)
4871	nvgpu_alloc_destroy(&vm->kernel);	4944	nvgpu_alloc_destroy(&vm->kernel);
4872	if (nvgpu_alloc_initialized(&vm->user))	4945	if (nvgpu_alloc_initialized(&vm->user))
4873	nvgpu_alloc_destroy(&vm->user);	4946	nvgpu_alloc_destroy(&vm->user);
		4947	if (nvgpu_alloc_initialized(&vm->user_lp))
		4948	nvgpu_alloc_destroy(&vm->user_lp);
4874		4949
4875	gk20a_vm_free_entries(vm, &vm->pdb, 0);	4950	gk20a_vm_free_entries(vm, &vm->pdb, 0);
4876	}	4951	}
@@ -5000,7 +5075,7 @@ static int gk20a_init_cde_vm(struct mm_gk20a *mm)
5000	u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size;	5075	u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size;
5001		5076
5002	return gk20a_init_vm(mm, vm, big_page_size,	5077	return gk20a_init_vm(mm, vm, big_page_size,
5003	SZ_4K * 16,	5078	big_page_size << 10,
5004	NV_MM_DEFAULT_KERNEL_SIZE,	5079	NV_MM_DEFAULT_KERNEL_SIZE,
5005	NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,	5080	NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
5006	false, false, "cde");	5081	false, false, "cde");
@@ -5013,7 +5088,7 @@ static int gk20a_init_ce_vm(struct mm_gk20a *mm)
5013	u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size;	5088	u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size;
5014		5089
5015	return gk20a_init_vm(mm, vm, big_page_size,	5090	return gk20a_init_vm(mm, vm, big_page_size,
5016	SZ_4K * 16,	5091	big_page_size << 10,
5017	NV_MM_DEFAULT_KERNEL_SIZE,	5092	NV_MM_DEFAULT_KERNEL_SIZE,
5018	NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,	5093	NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
5019	false, false, "ce");	5094	false, false, "ce");


diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index f362e27c..665aea42 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -270,9 +270,16 @@ struct vm_gk20a {
270		270
271	struct gk20a_mm_entry pdb;	271	struct gk20a_mm_entry pdb;
272		272
		273	/*
		274	* These structs define the address spaces. In some cases it's possible
		275	* to merge address spaces (user and user_lp) and in other cases it's
		276	* not. vma[] allows the code to be agnostic to this by always using
		277	* address spaces through this pointer array.
		278	*/
273	struct nvgpu_allocator *vma[gmmu_nr_page_sizes];	279	struct nvgpu_allocator *vma[gmmu_nr_page_sizes];
274	struct nvgpu_allocator kernel;	280	struct nvgpu_allocator kernel;
275	struct nvgpu_allocator user;	281	struct nvgpu_allocator user;
		282	struct nvgpu_allocator user_lp;
276		283
277	struct rb_root mapped_buffers;	284	struct rb_root mapped_buffers;
278		285
@@ -433,41 +440,19 @@ static inline int bar1_aperture_size_mb_gk20a(void)
433	/* The default kernel-reserved GPU VA size */	440	/* The default kernel-reserved GPU VA size */
434	#define NV_MM_DEFAULT_KERNEL_SIZE (1ULL << 32)	441	#define NV_MM_DEFAULT_KERNEL_SIZE (1ULL << 32)
435		442
436	enum gmmu_pgsz_gk20a __get_pte_size_fixed_map(struct vm_gk20a *vm,
437	u64 base, u64 size);
438
439	/*	443	/*
440	* This determines the PTE size for a given alloc. Used by both the GVA space	444	* When not using unified address spaces the bottom 16GB of the space are used
441	* allocator and the mm core code so that agreement can be reached on how to	445	* for small pages and the remaining high memory is used for large pages.
442	* map allocations.
443	*
444	* The page size of a buffer is this:
445	*
446	* o If the VM doesn't support large pages then obviously small pages
447	* must be used.
448	* o If the base address is non-zero (fixed address map):
449	* - Attempt to find a reserved memory area and use the page size
450	* based on that.
451	* - If no reserved page size is available, default to small pages.
452	* o If the base is zero:
453	* - If the size is greater than or equal to the big page size, use big
454	* pages.
455	* - Otherwise use small pages.
456	*/	446	*/
457	static inline enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm,	447	static inline u64 __nv_gmmu_va_small_page_limit(void)
458	u64 base, u64 size)
459	{	448	{
460	if (!vm->big_pages)	449	return ((u64)SZ_1G * 16);
461	return gmmu_page_size_small;
462
463	if (base)
464	return __get_pte_size_fixed_map(vm, base, size);
465
466	if (size >= vm->gmmu_page_sizes[gmmu_page_size_big])
467	return gmmu_page_size_big;
468	return gmmu_page_size_small;
469	}	450	}
470		451
		452	enum gmmu_pgsz_gk20a __get_pte_size_fixed_map(struct vm_gk20a *vm,
		453	u64 base, u64 size);
		454	enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm, u64 base, u64 size);
		455
471	/*	456	/*
472	* Buffer accessors - wrap between begin() and end() if there is no permanent	457	* Buffer accessors - wrap between begin() and end() if there is no permanent
473	* kernel mapping for this buffer.	458	* kernel mapping for this buffer.


diff --git a/drivers/gpu/nvgpu/gk20a/platform_gk20a.h b/drivers/gpu/nvgpu/gk20a/platform_gk20a.h index e71dbf32..3c2db924 100644 --- a/drivers/gpu/nvgpu/gk20a/platform_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/platform_gk20a.h
@@ -71,6 +71,9 @@ struct gk20a_platform {
71	/* set if ASPM should be disabled on boot; only makes sense for PCI */	71	/* set if ASPM should be disabled on boot; only makes sense for PCI */
72	bool disable_aspm;	72	bool disable_aspm;
73		73
		74	/* Set if the platform can unify the small/large address spaces. */
		75	bool unify_address_spaces;
		76
74	/* Should be populated by probe. */	77	/* Should be populated by probe. */
75	struct dentry *debugfs;	78	struct dentry *debugfs;
76	struct dentry *debugfs_alias;	79	struct dentry *debugfs_alias;