7 files changed, 121 insertions, 36 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/as_gk20a.c b/drivers/gpu/nvgpu/gk20a/as_gk20a.c
index 87b32add..b6b38541 100644
--- a/drivers/gpu/nvgpu/gk20a/as_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/as_gk20a.c
@@ -38,7 +38,8 @@ static void release_as_share_id(struct gk20a_as *as, int id)
 }
 int gk20a_as_alloc_share(struct gk20a_as *as,
-                         u32 flags, struct gk20a_as_share **out)
+                         u32 big_page_size, u32 flags,
+                         struct gk20a_as_share **out)
 {
        struct gk20a *g = gk20a_from_as(as);
        struct gk20a_as_share *as_share;
@@ -59,7 +60,7 @@ int gk20a_as_alloc_share(struct gk20a_as *as,
        err = gk20a_busy(g->dev);
        if (err)
                goto failed;
-        err = g->ops.mm.vm_alloc_share(as_share, flags);
+        err = g->ops.mm.vm_alloc_share(as_share, big_page_size, flags);
        gk20a_idle(g->dev);
        if (err)
@@ -332,7 +333,7 @@ int gk20a_as_dev_open(struct inode *inode, struct file *filp)
        g = container_of(inode->i_cdev, struct gk20a, as.cdev);
-        err = gk20a_as_alloc_share(&g->as, 0, &as_share);
+        err = gk20a_as_alloc_share(&g->as, 0, 0, &as_share);
        if (err) {
                gk20a_dbg_fn("failed to alloc share");
                return err;
diff --git a/drivers/gpu/nvgpu/gk20a/as_gk20a.h b/drivers/gpu/nvgpu/gk20a/as_gk20a.h
index 166000a8..d347479e 100644
--- a/drivers/gpu/nvgpu/gk20a/as_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/as_gk20a.h
@@ -1,7 +1,7 @@
 /*
 * GK20A Address Spaces
 *
- * Copyright (c) 2011-2014, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2011-2015, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -42,7 +42,9 @@ int gk20a_as_release_share(struct gk20a_as_share *as_share);
 int gk20a_as_dev_open(struct inode *inode, struct file *filp);
 int gk20a_as_dev_release(struct inode *inode, struct file *filp);
 long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
-int gk20a_as_alloc_share(struct gk20a_as *as,
+/* if big_page_size == 0, the default big page size is used */
+int gk20a_as_alloc_share(struct gk20a_as *as, u32 big_page_size,
                         u32 flags, struct gk20a_as_share **out);
 #endif
diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
index 0b6b5913..6dc92713 100644
--- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
@@ -166,7 +166,8 @@ static int gk20a_ctrl_alloc_as(
                goto clean_up;
        }
-        err = gk20a_as_alloc_share(&g->as, args->big_page_size, &as_share);
+        err = gk20a_as_alloc_share(&g->as, args->big_page_size, args->flags,
+                                   &as_share);
        if (err)
                goto clean_up_file;
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index 0d74099e..a97ec735 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -1993,6 +1993,8 @@ int gk20a_init_gpu_characteristics(struct gk20a *g)
            gk20a_platform_has_syncpoints(g->dev))
                gpu->flags |= NVGPU_GPU_FLAGS_HAS_SYNCPOINTS;
+        gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_USERSPACE_MANAGED_AS;
        gpu->gpc_mask = 1;
        g->ops.gr.detect_sm_arch(g);
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 3542a597..ff37039f 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -370,7 +370,7 @@ struct gpu_ops {
                                struct vm_gk20a_mapping_batch *batch);
                void (*vm_remove)(struct vm_gk20a *vm);
                int (*vm_alloc_share)(struct gk20a_as_share *as_share,
-                                      u32 flags);
+                                      u32 big_page_size, u32 flags);
                int (*vm_bind_channel)(struct gk20a_as_share *as_share,
                                struct channel_gk20a *ch);
                int (*fb_flush)(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 141a37af..a9bca317 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -767,6 +767,12 @@ int gk20a_vm_get_buffers(struct vm_gk20a *vm,
        struct rb_node *node;
        int i = 0;
+        if (vm->userspace_managed) {
+                *mapped_buffers = NULL;
+                *num_buffers = 0;
+                return 0;
+        }
        mutex_lock(&vm->update_gmmu_lock);
        buffer_list = nvgpu_alloc(sizeof(*buffer_list) *
@@ -1135,7 +1141,8 @@ static int setup_buffer_kind_and_compression(struct vm_gk20a *vm,
 static int validate_fixed_buffer(struct vm_gk20a *vm,
                                 struct buffer_attrs *bfr,
-                                 u64 map_offset, u64 map_size)
+                                 u64 map_offset, u64 map_size,
+                                 struct vm_reserved_va_node **pva_node)
 {
        struct device *dev = dev_from_vm(vm);
        struct vm_reserved_va_node *va_node;
@@ -1154,15 +1161,16 @@ static int validate_fixed_buffer(struct vm_gk20a *vm,
                return -EINVAL;
        }
-        /* find the space reservation */
+        /* Find the space reservation, but it's ok to have none for
+         * userspace-managed address spaces */
        va_node = addr_to_reservation(vm, map_offset);
-        if (!va_node) {
+        if (!va_node && !vm->userspace_managed) {
                gk20a_warn(dev, "fixed offset mapping without space allocation");
                return -EINVAL;
        }
-        /* mapped area should fit inside va */
+        /* Mapped area should fit inside va, if there's one */
-        if (map_end > va_node->vaddr_start + va_node->size) {
+        if (va_node && map_end > va_node->vaddr_start + va_node->size) {
                gk20a_warn(dev, "fixed offset mapping size overflows va node");
                return -EINVAL;
        }
@@ -1177,6 +1185,8 @@ static int validate_fixed_buffer(struct vm_gk20a *vm,
                return -EINVAL;
        }
+        *pva_node = va_node;
        return 0;
 }
@@ -1411,16 +1421,28 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
        u64 buf_addr;
        u64 ctag_map_win_size = 0;
        u32 ctag_map_win_ctagline = 0;
+        struct vm_reserved_va_node *va_node = NULL;
+        if (user_mapped && vm->userspace_managed &&
+            !(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)) {
+                gk20a_err(d,
+                          "%s: non-fixed-offset mapping not available on userspace managed address spaces",
+                          __func__);
+                return -EFAULT;
+        }
        mutex_lock(&vm->update_gmmu_lock);
        /* check if this buffer is already mapped */
-        map_offset = gk20a_vm_map_duplicate_locked(vm, dmabuf, offset_align,
+        if (!vm->userspace_managed) {
-                                                   flags, kind, sgt,
+                map_offset = gk20a_vm_map_duplicate_locked(
-                                                   user_mapped, rw_flag);
+                        vm, dmabuf, offset_align,
-        if (map_offset) {
+                        flags, kind, sgt,
-                mutex_unlock(&vm->update_gmmu_lock);
+                        user_mapped, rw_flag);
-                return map_offset;
+                if (map_offset) {
+                        mutex_unlock(&vm->update_gmmu_lock);
+                        return map_offset;
+                }
        }
        /* pin buffer to get phys/iovmm addr */
@@ -1504,7 +1526,8 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
        if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)  {
                err = validate_fixed_buffer(vm, &bfr,
-                        offset_align, mapping_size);
+                                            offset_align, mapping_size,
+                                            &va_node);
                if (err)
                        goto clean_up;
@@ -1671,11 +1694,7 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
        gk20a_dbg_info("allocated va @ 0x%llx", map_offset);
-        if (!va_allocated) {
+        if (va_node) {
-                struct vm_reserved_va_node *va_node;
-                /* find the space reservation */
-                va_node = addr_to_reservation(vm, map_offset);
                list_add_tail(&mapped_buffer->va_buffers_list,
                              &va_node->va_buffers_list);
                mapped_buffer->va_node = va_node;
@@ -1753,18 +1772,27 @@ int gk20a_vm_map_compbits(struct vm_gk20a *vm,
        struct mapped_buffer_node *mapped_buffer;
        struct gk20a *g = gk20a_from_vm(vm);
        struct device *d = dev_from_vm(vm);
+        const bool fixed_mapping =
+                (flags & NVGPU_AS_MAP_BUFFER_COMPBITS_FLAGS_FIXED_OFFSET) != 0;
+        if (vm->userspace_managed && !fixed_mapping) {
+                gk20a_err(d,
+                          "%s: non-fixed-offset mapping is not available on userspace managed address spaces",
+                          __func__);
+                return -EFAULT;
+        }
-        if (flags & NVGPU_AS_MAP_BUFFER_COMPBITS_FLAGS_FIXED_OFFSET) {
+        if (fixed_mapping && !vm->userspace_managed) {
-                /* This will be implemented later */
                gk20a_err(d,
-                          "%s: fixed-offset compbits mapping not yet supported",
+                          "%s: fixed-offset mapping is available only on userspace managed address spaces",
                          __func__);
                return -EFAULT;
        }
        mutex_lock(&vm->update_gmmu_lock);
-        mapped_buffer = find_mapped_buffer_locked(&vm->mapped_buffers, mapping_gva);
+        mapped_buffer =
+                find_mapped_buffer_locked(&vm->mapped_buffers, mapping_gva);
        if (!mapped_buffer || !mapped_buffer->user_mapped) {
                mutex_unlock(&vm->update_gmmu_lock);
@@ -1774,7 +1802,8 @@ int gk20a_vm_map_compbits(struct vm_gk20a *vm,
        if (!mapped_buffer->ctags_mappable) {
                mutex_unlock(&vm->update_gmmu_lock);
-                gk20a_err(d, "%s: comptags not mappable, offset 0x%llx", __func__, mapping_gva);
+                gk20a_err(d, "%s: comptags not mappable, offset 0x%llx",
+                          __func__, mapping_gva);
                return -EFAULT;
        }
@@ -1804,10 +1833,41 @@ int gk20a_vm_map_compbits(struct vm_gk20a *vm,
                cacheline_offset_start =
                        cacheline_start * aggregate_cacheline_sz;
+                if (fixed_mapping) {
+                        struct buffer_attrs bfr;
+                        int err;
+                        struct vm_reserved_va_node *va_node = NULL;
+                        memset(&bfr, 0, sizeof(bfr));
+                        bfr.pgsz_idx = small_pgsz_index;
+                        err = validate_fixed_buffer(
+                                vm, &bfr, *compbits_win_gva,
+                                mapped_buffer->ctag_map_win_size, &va_node);
+                        if (err) {
+                                mutex_unlock(&vm->update_gmmu_lock);
+                                return err;
+                        }
+                        if (va_node) {
+                                /* this would create a dangling GPU VA
+                                 * pointer if the space is freed
+                                 * before before the buffer is
+                                 * unmapped */
+                                mutex_unlock(&vm->update_gmmu_lock);
+                                gk20a_err(d,
+                                          "%s: comptags cannot be mapped into allocated space",
+                                          __func__);
+                                return -EINVAL;
+                        }
+                }
                mapped_buffer->ctag_map_win_addr =
                        g->ops.mm.gmmu_map(
                                vm,
-                                0,
+                                !fixed_mapping ? 0 : *compbits_win_gva, /* va */
                                g->gr.compbit_store.mem.sgt,
                                cacheline_offset_start, /* sg offset */
                                mapped_buffer->ctag_map_win_size, /* size */
@@ -1828,6 +1888,15 @@ int gk20a_vm_map_compbits(struct vm_gk20a *vm,
                                  __func__, mapping_gva);
                        return -ENOMEM;
                }
+        } else if (fixed_mapping && *compbits_win_gva &&
+                   mapped_buffer->ctag_map_win_addr != *compbits_win_gva) {
+                mutex_unlock(&vm->update_gmmu_lock);
+                gk20a_err(d,
+                          "%s: re-requesting comptags map into mismatching address. buffer offset 0x"
+                          "%llx, existing comptag map at 0x%llx, requested remap 0x%llx",
+                          __func__, mapping_gva,
+                          mapped_buffer->ctag_map_win_addr, *compbits_win_gva);
+                return -EINVAL;
        }
        *mapping_iova = gk20a_mm_iova_addr(g, mapped_buffer->sgt->sgl, 0);
@@ -2662,6 +2731,7 @@ int gk20a_init_vm(struct mm_gk20a *mm,
                u64 kernel_reserved,
                u64 aperture_size,
                bool big_pages,
+                bool userspace_managed,
                char *name)
 {
        int err, i;
@@ -2685,6 +2755,8 @@ int gk20a_init_vm(struct mm_gk20a *mm,
        vm->big_page_size = gmmu_page_sizes[gmmu_page_size_big];
+        vm->userspace_managed = userspace_managed;
        vm->mmu_levels = vm->mm->g->ops.mm.get_mmu_levels(vm->mm->g,
                        vm->big_page_size);
@@ -2821,7 +2893,8 @@ clean_up_pdes:
 }
 /* address space interfaces for the gk20a module */
-int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 big_page_size)
+int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 big_page_size,
+                         u32 flags)
 {
        struct gk20a_as *as = as_share->as;
        struct gk20a *g = gk20a_from_as(as);
@@ -2829,6 +2902,8 @@ int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 big_page_size)
        struct vm_gk20a *vm;
        char name[32];
        int err;
+        const bool userspace_managed =
+                (flags & NVGPU_GPU_IOCTL_ALLOC_AS_FLAGS_USERSPACE_MANAGED) != 0;
        gk20a_dbg_fn("");
@@ -2856,7 +2931,7 @@ int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 big_page_size)
        err = gk20a_init_vm(mm, vm, big_page_size, big_page_size << 10,
                            mm->channel.kernel_size,
                            mm->channel.user_size + mm->channel.kernel_size,
-                            !mm->disable_bigpage, name);
+                            !mm->disable_bigpage, userspace_managed, name);
        return err;
 }
@@ -3235,7 +3310,7 @@ static int gk20a_init_bar1_vm(struct mm_gk20a *mm)
        gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size);
        gk20a_init_vm(mm, vm, big_page_size, SZ_4K,
                      mm->bar1.aperture_size - SZ_4K,
-                      mm->bar1.aperture_size, false, "bar1");
+                      mm->bar1.aperture_size, false, false, "bar1");
        err = gk20a_alloc_inst_block(g, inst_block);
        if (err)
@@ -3263,7 +3338,7 @@ static int gk20a_init_system_vm(struct mm_gk20a *mm)
        gk20a_init_vm(mm, vm, big_page_size,
                      SZ_4K * 16, GK20A_PMU_VA_SIZE,
-                      GK20A_PMU_VA_SIZE * 2, false,
+                      GK20A_PMU_VA_SIZE * 2, false, false,
                      "system");
        err = gk20a_alloc_inst_block(g, inst_block);
@@ -3303,7 +3378,7 @@ static int gk20a_init_cde_vm(struct mm_gk20a *mm)
                        SZ_4K * 16,
                        NV_MM_DEFAULT_KERNEL_SIZE,
                        NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
-                        false, "cde");
+                        false, false, "cde");
 }
 void gk20a_mm_init_pdb(struct gk20a *g, void *inst_ptr, u64 pdb_addr)
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 7be4383b..2dd4ccf5 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -251,6 +251,8 @@ struct vm_gk20a {
        u32 big_page_size;
+        bool userspace_managed;
        const struct gk20a_mmu_level *mmu_levels;
        struct kref ref;
@@ -586,7 +588,8 @@ int gk20a_vm_free_va(struct vm_gk20a *vm,
 /* vm-as interface */
 struct nvgpu_as_alloc_space_args;
 struct nvgpu_as_free_space_args;
-int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 flags);
+int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 big_page_size,
+                         u32 flags);
 int gk20a_vm_release_share(struct gk20a_as_share *as_share);
 int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
                         struct nvgpu_as_alloc_space_args *args);
@@ -621,6 +624,7 @@ int gk20a_init_vm(struct mm_gk20a *mm,
                u64 kernel_reserved,
                u64 aperture_size,
                bool big_pages,
+                bool userspace_managed,
                char *name);
 void gk20a_deinit_vm(struct vm_gk20a *vm);

diff --git a/drivers/gpu/nvgpu/gk20a/as_gk20a.c b/drivers/gpu/nvgpu/gk20a/as_gk20a.c index 87b32add..b6b38541 100644 --- a/drivers/gpu/nvgpu/gk20a/as_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/as_gk20a.c
@@ -38,7 +38,8 @@ static void release_as_share_id(struct gk20a_as *as, int id)
38	}	38	}
39		39
40	int gk20a_as_alloc_share(struct gk20a_as *as,	40	int gk20a_as_alloc_share(struct gk20a_as *as,
41	u32 flags, struct gk20a_as_share **out)	41	u32 big_page_size, u32 flags,
		42	struct gk20a_as_share **out)
42	{	43	{
43	struct gk20a *g = gk20a_from_as(as);	44	struct gk20a *g = gk20a_from_as(as);
44	struct gk20a_as_share *as_share;	45	struct gk20a_as_share *as_share;
@@ -59,7 +60,7 @@ int gk20a_as_alloc_share(struct gk20a_as *as,
59	err = gk20a_busy(g->dev);	60	err = gk20a_busy(g->dev);
60	if (err)	61	if (err)
61	goto failed;	62	goto failed;
62	err = g->ops.mm.vm_alloc_share(as_share, flags);	63	err = g->ops.mm.vm_alloc_share(as_share, big_page_size, flags);
63	gk20a_idle(g->dev);	64	gk20a_idle(g->dev);
64		65
65	if (err)	66	if (err)
@@ -332,7 +333,7 @@ int gk20a_as_dev_open(struct inode inode, struct file filp)
332		333
333	g = container_of(inode->i_cdev, struct gk20a, as.cdev);	334	g = container_of(inode->i_cdev, struct gk20a, as.cdev);
334		335
335	err = gk20a_as_alloc_share(&g->as, 0, &as_share);	336	err = gk20a_as_alloc_share(&g->as, 0, 0, &as_share);
336	if (err) {	337	if (err) {
337	gk20a_dbg_fn("failed to alloc share");	338	gk20a_dbg_fn("failed to alloc share");
338	return err;	339	return err;


diff --git a/drivers/gpu/nvgpu/gk20a/as_gk20a.h b/drivers/gpu/nvgpu/gk20a/as_gk20a.h index 166000a8..d347479e 100644 --- a/drivers/gpu/nvgpu/gk20a/as_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/as_gk20a.h
@@ -1,7 +1,7 @@
1	/*	1	/*
2	* GK20A Address Spaces	2	* GK20A Address Spaces
3	*	3	*
4	* Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.	4	* Copyright (c) 2011-2015, NVIDIA CORPORATION. All rights reserved.
5	*	5	*
6	* This program is free software; you can redistribute it and/or modify it	6	* This program is free software; you can redistribute it and/or modify it
7	* under the terms and conditions of the GNU General Public License,	7	* under the terms and conditions of the GNU General Public License,
@@ -42,7 +42,9 @@ int gk20a_as_release_share(struct gk20a_as_share *as_share);
42	int gk20a_as_dev_open(struct inode inode, struct file filp);	42	int gk20a_as_dev_open(struct inode inode, struct file filp);
43	int gk20a_as_dev_release(struct inode inode, struct file filp);	43	int gk20a_as_dev_release(struct inode inode, struct file filp);
44	long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);	44	long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
45	int gk20a_as_alloc_share(struct gk20a_as *as,	45
		46	/* if big_page_size == 0, the default big page size is used */
		47	int gk20a_as_alloc_share(struct gk20a_as *as, u32 big_page_size,
46	u32 flags, struct gk20a_as_share **out);	48	u32 flags, struct gk20a_as_share **out);
47		49
48	#endif	50	#endif


diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c index 0b6b5913..6dc92713 100644 --- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
@@ -166,7 +166,8 @@ static int gk20a_ctrl_alloc_as(
166	goto clean_up;	166	goto clean_up;
167	}	167	}
168		168
169	err = gk20a_as_alloc_share(&g->as, args->big_page_size, &as_share);	169	err = gk20a_as_alloc_share(&g->as, args->big_page_size, args->flags,
		170	&as_share);
170	if (err)	171	if (err)
171	goto clean_up_file;	172	goto clean_up_file;
172		173


diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index 0d74099e..a97ec735 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -1993,6 +1993,8 @@ int gk20a_init_gpu_characteristics(struct gk20a *g)
1993	gk20a_platform_has_syncpoints(g->dev))	1993	gk20a_platform_has_syncpoints(g->dev))
1994	gpu->flags \|= NVGPU_GPU_FLAGS_HAS_SYNCPOINTS;	1994	gpu->flags \|= NVGPU_GPU_FLAGS_HAS_SYNCPOINTS;
1995		1995
		1996	gpu->flags \|= NVGPU_GPU_FLAGS_SUPPORT_USERSPACE_MANAGED_AS;
		1997
1996	gpu->gpc_mask = 1;	1998	gpu->gpc_mask = 1;
1997		1999
1998	g->ops.gr.detect_sm_arch(g);	2000	g->ops.gr.detect_sm_arch(g);


diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 3542a597..ff37039f 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -370,7 +370,7 @@ struct gpu_ops {
370	struct vm_gk20a_mapping_batch *batch);	370	struct vm_gk20a_mapping_batch *batch);
371	void (vm_remove)(struct vm_gk20a vm);	371	void (vm_remove)(struct vm_gk20a vm);
372	int (vm_alloc_share)(struct gk20a_as_share as_share,	372	int (vm_alloc_share)(struct gk20a_as_share as_share,
373	u32 flags);	373	u32 big_page_size, u32 flags);
374	int (vm_bind_channel)(struct gk20a_as_share as_share,	374	int (vm_bind_channel)(struct gk20a_as_share as_share,
375	struct channel_gk20a *ch);	375	struct channel_gk20a *ch);
376	int (fb_flush)(struct gk20a g);	376	int (fb_flush)(struct gk20a g);


diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 141a37af..a9bca317 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -767,6 +767,12 @@ int gk20a_vm_get_buffers(struct vm_gk20a *vm,
767	struct rb_node *node;	767	struct rb_node *node;
768	int i = 0;	768	int i = 0;
769		769
		770	if (vm->userspace_managed) {
		771	*mapped_buffers = NULL;
		772	*num_buffers = 0;
		773	return 0;
		774	}
		775
770	mutex_lock(&vm->update_gmmu_lock);	776	mutex_lock(&vm->update_gmmu_lock);
771		777
772	buffer_list = nvgpu_alloc(sizeof(buffer_list)	778	buffer_list = nvgpu_alloc(sizeof(buffer_list)
@@ -1135,7 +1141,8 @@ static int setup_buffer_kind_and_compression(struct vm_gk20a *vm,
1135		1141
1136	static int validate_fixed_buffer(struct vm_gk20a *vm,	1142	static int validate_fixed_buffer(struct vm_gk20a *vm,
1137	struct buffer_attrs *bfr,	1143	struct buffer_attrs *bfr,
1138	u64 map_offset, u64 map_size)	1144	u64 map_offset, u64 map_size,
		1145	struct vm_reserved_va_node **pva_node)
1139	{	1146	{
1140	struct device *dev = dev_from_vm(vm);	1147	struct device *dev = dev_from_vm(vm);
1141	struct vm_reserved_va_node *va_node;	1148	struct vm_reserved_va_node *va_node;
@@ -1154,15 +1161,16 @@ static int validate_fixed_buffer(struct vm_gk20a *vm,
1154	return -EINVAL;	1161	return -EINVAL;
1155	}	1162	}
1156		1163
1157	/* find the space reservation */	1164	/* Find the space reservation, but it's ok to have none for
		1165	* userspace-managed address spaces */
1158	va_node = addr_to_reservation(vm, map_offset);	1166	va_node = addr_to_reservation(vm, map_offset);
1159	if (!va_node) {	1167	if (!va_node && !vm->userspace_managed) {
1160	gk20a_warn(dev, "fixed offset mapping without space allocation");	1168	gk20a_warn(dev, "fixed offset mapping without space allocation");
1161	return -EINVAL;	1169	return -EINVAL;
1162	}	1170	}
1163		1171
1164	/* mapped area should fit inside va */	1172	/* Mapped area should fit inside va, if there's one */
1165	if (map_end > va_node->vaddr_start + va_node->size) {	1173	if (va_node && map_end > va_node->vaddr_start + va_node->size) {
1166	gk20a_warn(dev, "fixed offset mapping size overflows va node");	1174	gk20a_warn(dev, "fixed offset mapping size overflows va node");
1167	return -EINVAL;	1175	return -EINVAL;
1168	}	1176	}
@@ -1177,6 +1185,8 @@ static int validate_fixed_buffer(struct vm_gk20a *vm,
1177	return -EINVAL;	1185	return -EINVAL;
1178	}	1186	}
1179		1187
		1188	*pva_node = va_node;
		1189
1180	return 0;	1190	return 0;
1181	}	1191	}
1182		1192
@@ -1411,16 +1421,28 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
1411	u64 buf_addr;	1421	u64 buf_addr;
1412	u64 ctag_map_win_size = 0;	1422	u64 ctag_map_win_size = 0;
1413	u32 ctag_map_win_ctagline = 0;	1423	u32 ctag_map_win_ctagline = 0;
		1424	struct vm_reserved_va_node *va_node = NULL;
		1425
		1426	if (user_mapped && vm->userspace_managed &&
		1427	!(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)) {
		1428	gk20a_err(d,
		1429	"%s: non-fixed-offset mapping not available on userspace managed address spaces",
		1430	__func__);
		1431	return -EFAULT;
		1432	}
1414		1433
1415	mutex_lock(&vm->update_gmmu_lock);	1434	mutex_lock(&vm->update_gmmu_lock);
1416		1435
1417	/* check if this buffer is already mapped */	1436	/* check if this buffer is already mapped */
1418	map_offset = gk20a_vm_map_duplicate_locked(vm, dmabuf, offset_align,	1437	if (!vm->userspace_managed) {
1419	flags, kind, sgt,	1438	map_offset = gk20a_vm_map_duplicate_locked(
1420	user_mapped, rw_flag);	1439	vm, dmabuf, offset_align,
1421	if (map_offset) {	1440	flags, kind, sgt,
1422	mutex_unlock(&vm->update_gmmu_lock);	1441	user_mapped, rw_flag);
1423	return map_offset;	1442	if (map_offset) {
		1443	mutex_unlock(&vm->update_gmmu_lock);
		1444	return map_offset;
		1445	}
1424	}	1446	}
1425		1447
1426	/* pin buffer to get phys/iovmm addr */	1448	/* pin buffer to get phys/iovmm addr */
@@ -1504,7 +1526,8 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
1504		1526
1505	if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {	1527	if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
1506	err = validate_fixed_buffer(vm, &bfr,	1528	err = validate_fixed_buffer(vm, &bfr,
1507	offset_align, mapping_size);	1529	offset_align, mapping_size,
		1530	&va_node);
1508	if (err)	1531	if (err)
1509	goto clean_up;	1532	goto clean_up;
1510		1533
@@ -1671,11 +1694,7 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
1671		1694
1672	gk20a_dbg_info("allocated va @ 0x%llx", map_offset);	1695	gk20a_dbg_info("allocated va @ 0x%llx", map_offset);
1673		1696
1674	if (!va_allocated) {	1697	if (va_node) {
1675	struct vm_reserved_va_node *va_node;
1676
1677	/* find the space reservation */
1678	va_node = addr_to_reservation(vm, map_offset);
1679	list_add_tail(&mapped_buffer->va_buffers_list,	1698	list_add_tail(&mapped_buffer->va_buffers_list,
1680	&va_node->va_buffers_list);	1699	&va_node->va_buffers_list);
1681	mapped_buffer->va_node = va_node;	1700	mapped_buffer->va_node = va_node;
@@ -1753,18 +1772,27 @@ int gk20a_vm_map_compbits(struct vm_gk20a *vm,
1753	struct mapped_buffer_node *mapped_buffer;	1772	struct mapped_buffer_node *mapped_buffer;
1754	struct gk20a *g = gk20a_from_vm(vm);	1773	struct gk20a *g = gk20a_from_vm(vm);
1755	struct device *d = dev_from_vm(vm);	1774	struct device *d = dev_from_vm(vm);
		1775	const bool fixed_mapping =
		1776	(flags & NVGPU_AS_MAP_BUFFER_COMPBITS_FLAGS_FIXED_OFFSET) != 0;
		1777
		1778	if (vm->userspace_managed && !fixed_mapping) {
		1779	gk20a_err(d,
		1780	"%s: non-fixed-offset mapping is not available on userspace managed address spaces",
		1781	__func__);
		1782	return -EFAULT;
		1783	}
1756		1784
1757	if (flags & NVGPU_AS_MAP_BUFFER_COMPBITS_FLAGS_FIXED_OFFSET) {	1785	if (fixed_mapping && !vm->userspace_managed) {
1758	/* This will be implemented later */
1759	gk20a_err(d,	1786	gk20a_err(d,
1760	"%s: fixed-offset compbits mapping not yet supported",	1787	"%s: fixed-offset mapping is available only on userspace managed address spaces",
1761	__func__);	1788	__func__);
1762	return -EFAULT;	1789	return -EFAULT;
1763	}	1790	}
1764		1791
1765	mutex_lock(&vm->update_gmmu_lock);	1792	mutex_lock(&vm->update_gmmu_lock);
1766		1793
1767	mapped_buffer = find_mapped_buffer_locked(&vm->mapped_buffers, mapping_gva);	1794	mapped_buffer =
		1795	find_mapped_buffer_locked(&vm->mapped_buffers, mapping_gva);
1768		1796
1769	if (!mapped_buffer \|\| !mapped_buffer->user_mapped) {	1797	if (!mapped_buffer \|\| !mapped_buffer->user_mapped) {
1770	mutex_unlock(&vm->update_gmmu_lock);	1798	mutex_unlock(&vm->update_gmmu_lock);
@@ -1774,7 +1802,8 @@ int gk20a_vm_map_compbits(struct vm_gk20a *vm,
1774		1802
1775	if (!mapped_buffer->ctags_mappable) {	1803	if (!mapped_buffer->ctags_mappable) {
1776	mutex_unlock(&vm->update_gmmu_lock);	1804	mutex_unlock(&vm->update_gmmu_lock);
1777	gk20a_err(d, "%s: comptags not mappable, offset 0x%llx", __func__, mapping_gva);	1805	gk20a_err(d, "%s: comptags not mappable, offset 0x%llx",
		1806	__func__, mapping_gva);
1778	return -EFAULT;	1807	return -EFAULT;
1779	}	1808	}
1780		1809
@@ -1804,10 +1833,41 @@ int gk20a_vm_map_compbits(struct vm_gk20a *vm,
1804	cacheline_offset_start =	1833	cacheline_offset_start =
1805	cacheline_start * aggregate_cacheline_sz;	1834	cacheline_start * aggregate_cacheline_sz;
1806		1835
		1836	if (fixed_mapping) {
		1837	struct buffer_attrs bfr;
		1838	int err;
		1839	struct vm_reserved_va_node *va_node = NULL;
		1840
		1841	memset(&bfr, 0, sizeof(bfr));
		1842
		1843	bfr.pgsz_idx = small_pgsz_index;
		1844
		1845	err = validate_fixed_buffer(
		1846	vm, &bfr, *compbits_win_gva,
		1847	mapped_buffer->ctag_map_win_size, &va_node);
		1848
		1849	if (err) {
		1850	mutex_unlock(&vm->update_gmmu_lock);
		1851	return err;
		1852	}
		1853
		1854	if (va_node) {
		1855	/* this would create a dangling GPU VA
		1856	* pointer if the space is freed
		1857	* before before the buffer is
		1858	* unmapped */
		1859	mutex_unlock(&vm->update_gmmu_lock);
		1860	gk20a_err(d,
		1861	"%s: comptags cannot be mapped into allocated space",
		1862	__func__);
		1863	return -EINVAL;
		1864	}
		1865	}
		1866
1807	mapped_buffer->ctag_map_win_addr =	1867	mapped_buffer->ctag_map_win_addr =
1808	g->ops.mm.gmmu_map(	1868	g->ops.mm.gmmu_map(
1809	vm,	1869	vm,
1810	0,	1870	!fixed_mapping ? 0 : compbits_win_gva, / va */
1811	g->gr.compbit_store.mem.sgt,	1871	g->gr.compbit_store.mem.sgt,
1812	cacheline_offset_start, /* sg offset */	1872	cacheline_offset_start, /* sg offset */
1813	mapped_buffer->ctag_map_win_size, /* size */	1873	mapped_buffer->ctag_map_win_size, /* size */
@@ -1828,6 +1888,15 @@ int gk20a_vm_map_compbits(struct vm_gk20a *vm,
1828	__func__, mapping_gva);	1888	__func__, mapping_gva);
1829	return -ENOMEM;	1889	return -ENOMEM;
1830	}	1890	}
		1891	} else if (fixed_mapping && *compbits_win_gva &&
		1892	mapped_buffer->ctag_map_win_addr != *compbits_win_gva) {
		1893	mutex_unlock(&vm->update_gmmu_lock);
		1894	gk20a_err(d,
		1895	"%s: re-requesting comptags map into mismatching address. buffer offset 0x"
		1896	"%llx, existing comptag map at 0x%llx, requested remap 0x%llx",
		1897	__func__, mapping_gva,
		1898	mapped_buffer->ctag_map_win_addr, *compbits_win_gva);
		1899	return -EINVAL;
1831	}	1900	}
1832		1901
1833	*mapping_iova = gk20a_mm_iova_addr(g, mapped_buffer->sgt->sgl, 0);	1902	*mapping_iova = gk20a_mm_iova_addr(g, mapped_buffer->sgt->sgl, 0);
@@ -2662,6 +2731,7 @@ int gk20a_init_vm(struct mm_gk20a *mm,
2662	u64 kernel_reserved,	2731	u64 kernel_reserved,
2663	u64 aperture_size,	2732	u64 aperture_size,
2664	bool big_pages,	2733	bool big_pages,
		2734	bool userspace_managed,
2665	char *name)	2735	char *name)
2666	{	2736	{
2667	int err, i;	2737	int err, i;
@@ -2685,6 +2755,8 @@ int gk20a_init_vm(struct mm_gk20a *mm,
2685		2755
2686	vm->big_page_size = gmmu_page_sizes[gmmu_page_size_big];	2756	vm->big_page_size = gmmu_page_sizes[gmmu_page_size_big];
2687		2757
		2758	vm->userspace_managed = userspace_managed;
		2759
2688	vm->mmu_levels = vm->mm->g->ops.mm.get_mmu_levels(vm->mm->g,	2760	vm->mmu_levels = vm->mm->g->ops.mm.get_mmu_levels(vm->mm->g,
2689	vm->big_page_size);	2761	vm->big_page_size);
2690		2762
@@ -2821,7 +2893,8 @@ clean_up_pdes:
2821	}	2893	}
2822		2894
2823	/* address space interfaces for the gk20a module */	2895	/* address space interfaces for the gk20a module */
2824	int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 big_page_size)	2896	int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 big_page_size,
		2897	u32 flags)
2825	{	2898	{
2826	struct gk20a_as *as = as_share->as;	2899	struct gk20a_as *as = as_share->as;
2827	struct gk20a *g = gk20a_from_as(as);	2900	struct gk20a *g = gk20a_from_as(as);
@@ -2829,6 +2902,8 @@ int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 big_page_size)
2829	struct vm_gk20a *vm;	2902	struct vm_gk20a *vm;
2830	char name[32];	2903	char name[32];
2831	int err;	2904	int err;
		2905	const bool userspace_managed =
		2906	(flags & NVGPU_GPU_IOCTL_ALLOC_AS_FLAGS_USERSPACE_MANAGED) != 0;
2832		2907
2833	gk20a_dbg_fn("");	2908	gk20a_dbg_fn("");
2834		2909
@@ -2856,7 +2931,7 @@ int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 big_page_size)
2856	err = gk20a_init_vm(mm, vm, big_page_size, big_page_size << 10,	2931	err = gk20a_init_vm(mm, vm, big_page_size, big_page_size << 10,
2857	mm->channel.kernel_size,	2932	mm->channel.kernel_size,
2858	mm->channel.user_size + mm->channel.kernel_size,	2933	mm->channel.user_size + mm->channel.kernel_size,
2859	!mm->disable_bigpage, name);	2934	!mm->disable_bigpage, userspace_managed, name);
2860		2935
2861	return err;	2936	return err;
2862	}	2937	}
@@ -3235,7 +3310,7 @@ static int gk20a_init_bar1_vm(struct mm_gk20a *mm)
3235	gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size);	3310	gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size);
3236	gk20a_init_vm(mm, vm, big_page_size, SZ_4K,	3311	gk20a_init_vm(mm, vm, big_page_size, SZ_4K,
3237	mm->bar1.aperture_size - SZ_4K,	3312	mm->bar1.aperture_size - SZ_4K,
3238	mm->bar1.aperture_size, false, "bar1");	3313	mm->bar1.aperture_size, false, false, "bar1");
3239		3314
3240	err = gk20a_alloc_inst_block(g, inst_block);	3315	err = gk20a_alloc_inst_block(g, inst_block);
3241	if (err)	3316	if (err)
@@ -3263,7 +3338,7 @@ static int gk20a_init_system_vm(struct mm_gk20a *mm)
3263		3338
3264	gk20a_init_vm(mm, vm, big_page_size,	3339	gk20a_init_vm(mm, vm, big_page_size,
3265	SZ_4K * 16, GK20A_PMU_VA_SIZE,	3340	SZ_4K * 16, GK20A_PMU_VA_SIZE,
3266	GK20A_PMU_VA_SIZE * 2, false,	3341	GK20A_PMU_VA_SIZE * 2, false, false,
3267	"system");	3342	"system");
3268		3343
3269	err = gk20a_alloc_inst_block(g, inst_block);	3344	err = gk20a_alloc_inst_block(g, inst_block);
@@ -3303,7 +3378,7 @@ static int gk20a_init_cde_vm(struct mm_gk20a *mm)
3303	SZ_4K * 16,	3378	SZ_4K * 16,
3304	NV_MM_DEFAULT_KERNEL_SIZE,	3379	NV_MM_DEFAULT_KERNEL_SIZE,
3305	NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,	3380	NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
3306	false, "cde");	3381	false, false, "cde");
3307	}	3382	}
3308		3383
3309	void gk20a_mm_init_pdb(struct gk20a g, void inst_ptr, u64 pdb_addr)	3384	void gk20a_mm_init_pdb(struct gk20a g, void inst_ptr, u64 pdb_addr)


diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 7be4383b..2dd4ccf5 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -251,6 +251,8 @@ struct vm_gk20a {
251		251
252	u32 big_page_size;	252	u32 big_page_size;
253		253
		254	bool userspace_managed;
		255
254	const struct gk20a_mmu_level *mmu_levels;	256	const struct gk20a_mmu_level *mmu_levels;
255		257
256	struct kref ref;	258	struct kref ref;
@@ -586,7 +588,8 @@ int gk20a_vm_free_va(struct vm_gk20a *vm,
586	/* vm-as interface */	588	/* vm-as interface */
587	struct nvgpu_as_alloc_space_args;	589	struct nvgpu_as_alloc_space_args;
588	struct nvgpu_as_free_space_args;	590	struct nvgpu_as_free_space_args;
589	int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 flags);	591	int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 big_page_size,
		592	u32 flags);
590	int gk20a_vm_release_share(struct gk20a_as_share *as_share);	593	int gk20a_vm_release_share(struct gk20a_as_share *as_share);
591	int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,	594	int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
592	struct nvgpu_as_alloc_space_args *args);	595	struct nvgpu_as_alloc_space_args *args);
@@ -621,6 +624,7 @@ int gk20a_init_vm(struct mm_gk20a *mm,
621	u64 kernel_reserved,	624	u64 kernel_reserved,
622	u64 aperture_size,	625	u64 aperture_size,
623	bool big_pages,	626	bool big_pages,
		627	bool userspace_managed,
624	char *name);	628	char *name);
625	void gk20a_deinit_vm(struct vm_gk20a *vm);	629	void gk20a_deinit_vm(struct vm_gk20a *vm);
626		630