gpu: nvgpu: add vidmem allocation ioctl

Add NVGPU_GPU_IOCTL_ALLOC_VIDMEM to the ctrl fd for letting userspace allocate on-board GPU memory (aka vidmem). The allocations are returned as dmabuf fds. Also, report the amount of local video memory in the gpu characteristics. Jira DNVGPU-19 Jira DNVGPU-38 Change-Id: I28e361d31bb630b96d06bb1c86d022d91c7592bc Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: http://git-master/r/1181152 GVS: Gerrit_Virtual_Submit Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
author: Konsta Holtta <kholtta@nvidia.com> 2016-07-15 08:52:52 -0400
committer: Vijayakumar Subbu <vsubbu@nvidia.com> 2016-07-21 08:55:26 -0400
commit: 13231006671a1da11cfaf7a67e69430199820788 (patch)
tree: 6b4053838d672b158fe636b60768240585a21eb0
parent: 83071083d779b67ad73172675a6dfa34ed19b414 (diff)
5 files changed, 271 insertions, 1 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
index 3e34b6b8..6b832670 100644
--- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
@@ -704,6 +704,49 @@ clean_up:
        return err;
 }
+static int nvgpu_gpu_alloc_vidmem(struct gk20a *g,
+                        struct nvgpu_gpu_alloc_vidmem_args *args)
+{
+        u32 align = args->in.alignment ? args->in.alignment : SZ_4K;
+        int fd;
+        gk20a_dbg_fn("");
+        /* not yet supported */
+        if (WARN_ON(args->in.flags & NVGPU_GPU_ALLOC_VIDMEM_FLAG_CPU_MASK))
+                return -EINVAL;
+        /* not yet supported */
+        if (WARN_ON(args->in.flags & NVGPU_GPU_ALLOC_VIDMEM_FLAG_VPR))
+                return -EINVAL;
+        if (args->in.size & (SZ_4K - 1))
+                return -EINVAL;
+        if (!args->in.size)
+                return -EINVAL;
+        if (align & (align - 1))
+                return -EINVAL;
+        if (align > roundup_pow_of_two(args->in.size)) {
+                /* log this special case, buddy allocator detail */
+                gk20a_warn(dev_from_gk20a(g),
+                        "alignment larger than buffer size rounded up to power of 2 is not supported");
+                return -EINVAL;
+        }
+        fd = gk20a_vidmem_buf_alloc(g, args->in.size);
+        if (fd < 0)
+                return fd;
+        args->out.dmabuf_fd = fd;
+        gk20a_dbg_fn("done, fd=%d", fd);
+        return 0;
+}
 long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
        struct device *dev = filp->private_data;
@@ -951,6 +994,11 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
                        (struct nvgpu_gpu_get_engine_info_args *)buf);
                break;
+        case NVGPU_GPU_IOCTL_ALLOC_VIDMEM:
+                err = nvgpu_gpu_alloc_vidmem(g,
+                        (struct nvgpu_gpu_alloc_vidmem_args *)buf);
+                break;
        default:
                dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd);
                err = -ENOTTY;
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index 04f82033..bb8cb33f 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -2148,6 +2148,7 @@ int gk20a_init_gpu_characteristics(struct gk20a *g)
        gpu->default_compute_preempt_mode =
                g->gr.preemption_mode_rec.default_compute_preempt_mode;
+        gpu->local_video_memory_size = g->mm.vidmem.size;
        return 0;
 }
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index bf3d990c..2dcc4363 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -411,6 +411,14 @@ struct gk20a_dmabuf_priv {
        u64 buffer_id;
 };
+struct gk20a_vidmem_buf {
+        struct gk20a *g;
+        struct mem_desc mem;
+        struct dma_buf *dmabuf;
+        void *dmabuf_priv;
+        void (*dmabuf_priv_delete)(void *);
+};
 static void gk20a_vm_remove_support_nofree(struct vm_gk20a *vm);
 static int gk20a_comptaglines_alloc(struct gk20a_comptag_allocator *allocator,
@@ -1833,6 +1841,146 @@ static u64 gk20a_vm_map_duplicate_locked(struct vm_gk20a *vm,
        return mapped_buffer->addr;
 }
+#if defined(CONFIG_GK20A_VIDMEM)
+static struct sg_table *gk20a_vidbuf_map_dma_buf(
+        struct dma_buf_attachment *attach, enum dma_data_direction dir)
+{
+        struct gk20a_vidmem_buf *buf = attach->dmabuf->priv;
+        return buf->mem.sgt;
+}
+static void gk20a_vidbuf_unmap_dma_buf(struct dma_buf_attachment *attach,
+                                       struct sg_table *sgt,
+                                       enum dma_data_direction dir)
+{
+}
+static void gk20a_vidbuf_release(struct dma_buf *dmabuf)
+{
+        struct gk20a_vidmem_buf *buf = dmabuf->priv;
+        gk20a_dbg_fn("");
+        if (buf->dmabuf_priv)
+                buf->dmabuf_priv_delete(buf->dmabuf_priv);
+        gk20a_gmmu_free(buf->g, &buf->mem);
+        kfree(buf);
+}
+static void *gk20a_vidbuf_kmap(struct dma_buf *dmabuf, unsigned long page_num)
+{
+        WARN_ON("Not supported");
+        return NULL;
+}
+static void *gk20a_vidbuf_kmap_atomic(struct dma_buf *dmabuf,
+                                      unsigned long page_num)
+{
+        WARN_ON("Not supported");
+        return NULL;
+}
+static int gk20a_vidbuf_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
+{
+        return -EINVAL;
+}
+static int gk20a_vidbuf_set_private(struct dma_buf *dmabuf,
+                struct device *dev, void *priv, void (*delete)(void *priv))
+{
+        struct gk20a_vidmem_buf *buf = dmabuf->priv;
+        buf->dmabuf_priv = priv;
+        buf->dmabuf_priv_delete = delete;
+        return 0;
+}
+static void *gk20a_vidbuf_get_private(struct dma_buf *dmabuf,
+                struct device *dev)
+{
+        struct gk20a_vidmem_buf *buf = dmabuf->priv;
+        return buf->dmabuf_priv;
+}
+static const struct dma_buf_ops gk20a_vidbuf_ops = {
+        .map_dma_buf      = gk20a_vidbuf_map_dma_buf,
+        .unmap_dma_buf    = gk20a_vidbuf_unmap_dma_buf,
+        .release          = gk20a_vidbuf_release,
+        .kmap_atomic      = gk20a_vidbuf_kmap_atomic,
+        .kmap             = gk20a_vidbuf_kmap,
+        .mmap             = gk20a_vidbuf_mmap,
+        .set_drvdata      = gk20a_vidbuf_set_private,
+        .get_drvdata      = gk20a_vidbuf_get_private,
+};
+static struct dma_buf *gk20a_vidbuf_export(struct gk20a_vidmem_buf *buf)
+{
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)
+        DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
+        exp_info.priv = buf;
+        exp_info.ops = &gk20a_vidbuf_ops;
+        exp_info.size = buf->mem.size;
+        exp_info.flags = O_RDWR;
+        return dma_buf_export(&exp_info);
+#else
+        return dma_buf_export(buf, &gk20a_vidbuf_ops, buf->mem.size,
+                        O_RDWR, NULL);
+#endif
+}
+#endif
+int gk20a_vidmem_buf_alloc(struct gk20a *g, size_t bytes)
+{
+#if defined(CONFIG_GK20A_VIDMEM)
+        struct gk20a_vidmem_buf *buf;
+        int err, fd;
+        gk20a_dbg_fn("");
+        buf = kzalloc(sizeof(*buf), GFP_KERNEL);
+        if (!buf)
+                return -ENOMEM;
+        buf->g = g;
+        err = gk20a_gmmu_alloc_vid(g, bytes, &buf->mem);
+        if (err)
+                goto err_kfree;
+        buf->dmabuf = gk20a_vidbuf_export(buf);
+        if (IS_ERR(buf->dmabuf)) {
+                err = PTR_ERR(buf->dmabuf);
+                goto err_bfree;
+        }
+        fd = get_unused_fd_flags(O_RDWR);
+        if (fd < 0) {
+                /* ->release frees what we have done */
+                dma_buf_put(buf->dmabuf);
+                return fd;
+        }
+        /* fclose() on this drops one ref, freeing the dma buf */
+        fd_install(fd, buf->dmabuf->file);
+        return fd;
+err_bfree:
+        gk20a_gmmu_free(g, &buf->mem);
+err_kfree:
+        kfree(buf);
+        return err;
+#else
+        return -ENOSYS;
+#endif
+}
 u64 gk20a_vm_map(struct vm_gk20a *vm,
                        struct dma_buf *dmabuf,
                        u64 offset_align,
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index d7503948..5f0ce657 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -718,6 +718,8 @@ void gk20a_vm_mapping_batch_finish_locked(
        struct vm_gk20a *vm, struct vm_gk20a_mapping_batch *batch);
+int gk20a_vidmem_buf_alloc(struct gk20a *g, size_t bytes);
 /* Note: batch may be NULL if map op is not part of a batch */
 int gk20a_vm_map_buffer(struct vm_gk20a *vm,
                        int dmabuf_fd,
diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h
index aa950dfa..66ea05b3 100644
--- a/include/uapi/linux/nvgpu.h
+++ b/include/uapi/linux/nvgpu.h
@@ -196,6 +196,8 @@ struct nvgpu_gpu_characteristics {
        __u32 default_graphics_preempt_mode; /* NVGPU_GRAPHICS_PREEMPTION_MODE_* */
        __u32 default_compute_preempt_mode; /* NVGPU_COMPUTE_PREEMPTION_MODE_* */
+        __u64 local_video_memory_size; /* in bytes, non-zero only for dGPUs */
        /* Notes:
           - This struct can be safely appended with new fields. However, always
             keep the structure size multiple of 8 and make sure that the binary
@@ -434,6 +436,72 @@ struct nvgpu_gpu_get_engine_info_args {
        __u64 engine_info_buf_addr;
 };
+#define NVGPU_GPU_ALLOC_VIDMEM_FLAG_CONTIGUOUS          (1U << 0)
+/* CPU access and coherency flags (3 bits). Use CPU access with care,
+ * BAR resources are scarce. */
+#define NVGPU_GPU_ALLOC_VIDMEM_FLAG_CPU_NOT_MAPPABLE    (0U << 1)
+#define NVGPU_GPU_ALLOC_VIDMEM_FLAG_CPU_WRITE_COMBINE   (1U << 1)
+#define NVGPU_GPU_ALLOC_VIDMEM_FLAG_CPU_CACHED          (2U << 1)
+#define NVGPU_GPU_ALLOC_VIDMEM_FLAG_CPU_MASK            (7U << 1)
+#define NVGPU_GPU_ALLOC_VIDMEM_FLAG_VPR                 (1U << 4)
+/* Allocation of device-specific local video memory. Returns dmabuf fd
+ * on success. */
+struct nvgpu_gpu_alloc_vidmem_args {
+        union {
+                struct {
+                        /* Size for allocation. Must be a multiple of
+                         * small page size. */
+                        __u64 size;
+                        /* NVGPU_GPU_ALLOC_VIDMEM_FLAG_* */
+                        __u32 flags;
+                        /* Informational mem tag for resource usage
+                         * tracking. */
+                        __u16 memtag;
+                        __u16 reserved0;
+                        /* GPU-visible physical memory alignment in
+                         * bytes.
+                         *
+                         * Alignment must be a power of two. Minimum
+                         * alignment is the small page size, which 0
+                         * also denotes.
+                         *
+                         * For contiguous and non-contiguous
+                         * allocations, the start address of the
+                         * physical memory allocation will be aligned
+                         * by this value.
+                         *
+                         * For non-contiguous allocations, memory is
+                         * internally allocated in round_up(size /
+                         * alignment) contiguous blocks. The start
+                         * address of each block is aligned by the
+                         * alignment value. If the size is not a
+                         * multiple of alignment (which is ok), the
+                         * last allocation block size is (size %
+                         * alignment).
+                         *
+                         * By specifying the big page size here and
+                         * allocation size that is a multiple of big
+                         * pages, it will be guaranteed that the
+                         * allocated buffer is big page size mappable.
+                         */
+                        __u32 alignment;
+                        __u32 reserved1[3];
+                } in;
+                struct {
+                        __s32 dmabuf_fd;
+                } out;
+        };
+};
 #define NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE \
        _IOR(NVGPU_GPU_IOCTL_MAGIC, 1, struct nvgpu_gpu_zcull_get_ctx_size_args)
 #define NVGPU_GPU_IOCTL_ZCULL_GET_INFO \
@@ -489,8 +557,11 @@ struct nvgpu_gpu_get_engine_info_args {
 #define NVGPU_GPU_IOCTL_GET_ENGINE_INFO \
        _IOWR(NVGPU_GPU_IOCTL_MAGIC, 26, \
                        struct nvgpu_gpu_get_engine_info_args)
+#define NVGPU_GPU_IOCTL_ALLOC_VIDMEM \
+        _IOWR(NVGPU_GPU_IOCTL_MAGIC, 27, \
+                        struct nvgpu_gpu_alloc_vidmem_args)
 #define NVGPU_GPU_IOCTL_LAST            \
-        _IOC_NR(NVGPU_GPU_IOCTL_GET_ENGINE_INFO)
+        _IOC_NR(NVGPU_GPU_IOCTL_ALLOC_VIDMEM)
 #define NVGPU_GPU_IOCTL_MAX_ARG_SIZE    \
        sizeof(struct nvgpu_gpu_get_cpu_time_correlation_info_args)
author	Konsta Holtta <kholtta@nvidia.com>	2016-07-15 08:52:52 -0400
committer	Vijayakumar Subbu <vsubbu@nvidia.com>	2016-07-21 08:55:26 -0400
commit	13231006671a1da11cfaf7a67e69430199820788 (patch)
tree	6b4053838d672b158fe636b60768240585a21eb0
parent	83071083d779b67ad73172675a6dfa34ed19b414 (diff)

diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c index 3e34b6b8..6b832670 100644 --- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
@@ -704,6 +704,49 @@ clean_up:
704	return err;	704	return err;
705	}	705	}
706		706
		707	static int nvgpu_gpu_alloc_vidmem(struct gk20a *g,
		708	struct nvgpu_gpu_alloc_vidmem_args *args)
		709	{
		710	u32 align = args->in.alignment ? args->in.alignment : SZ_4K;
		711	int fd;
		712
		713	gk20a_dbg_fn("");
		714
		715	/* not yet supported */
		716	if (WARN_ON(args->in.flags & NVGPU_GPU_ALLOC_VIDMEM_FLAG_CPU_MASK))
		717	return -EINVAL;
		718
		719	/* not yet supported */
		720	if (WARN_ON(args->in.flags & NVGPU_GPU_ALLOC_VIDMEM_FLAG_VPR))
		721	return -EINVAL;
		722
		723	if (args->in.size & (SZ_4K - 1))
		724	return -EINVAL;
		725
		726	if (!args->in.size)
		727	return -EINVAL;
		728
		729	if (align & (align - 1))
		730	return -EINVAL;
		731
		732	if (align > roundup_pow_of_two(args->in.size)) {
		733	/* log this special case, buddy allocator detail */
		734	gk20a_warn(dev_from_gk20a(g),
		735	"alignment larger than buffer size rounded up to power of 2 is not supported");
		736	return -EINVAL;
		737	}
		738
		739	fd = gk20a_vidmem_buf_alloc(g, args->in.size);
		740	if (fd < 0)
		741	return fd;
		742
		743	args->out.dmabuf_fd = fd;
		744
		745	gk20a_dbg_fn("done, fd=%d", fd);
		746
		747	return 0;
		748	}
		749
707	long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)	750	long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
708	{	751	{
709	struct device *dev = filp->private_data;	752	struct device *dev = filp->private_data;
@@ -951,6 +994,11 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
951	(struct nvgpu_gpu_get_engine_info_args *)buf);	994	(struct nvgpu_gpu_get_engine_info_args *)buf);
952	break;	995	break;
953		996
		997	case NVGPU_GPU_IOCTL_ALLOC_VIDMEM:
		998	err = nvgpu_gpu_alloc_vidmem(g,
		999	(struct nvgpu_gpu_alloc_vidmem_args *)buf);
		1000	break;
		1001
954	default:	1002	default:
955	dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd);	1003	dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd);
956	err = -ENOTTY;	1004	err = -ENOTTY;


diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index 04f82033..bb8cb33f 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -2148,6 +2148,7 @@ int gk20a_init_gpu_characteristics(struct gk20a *g)
2148	gpu->default_compute_preempt_mode =	2148	gpu->default_compute_preempt_mode =
2149	g->gr.preemption_mode_rec.default_compute_preempt_mode;	2149	g->gr.preemption_mode_rec.default_compute_preempt_mode;
2150		2150
		2151	gpu->local_video_memory_size = g->mm.vidmem.size;
2151		2152
2152	return 0;	2153	return 0;
2153	}	2154	}


diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index bf3d990c..2dcc4363 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -411,6 +411,14 @@ struct gk20a_dmabuf_priv {
411	u64 buffer_id;	411	u64 buffer_id;
412	};	412	};
413		413
		414	struct gk20a_vidmem_buf {
		415	struct gk20a *g;
		416	struct mem_desc mem;
		417	struct dma_buf *dmabuf;
		418	void *dmabuf_priv;
		419	void (dmabuf_priv_delete)(void );
		420	};
		421
414	static void gk20a_vm_remove_support_nofree(struct vm_gk20a *vm);	422	static void gk20a_vm_remove_support_nofree(struct vm_gk20a *vm);
415		423
416	static int gk20a_comptaglines_alloc(struct gk20a_comptag_allocator *allocator,	424	static int gk20a_comptaglines_alloc(struct gk20a_comptag_allocator *allocator,
@@ -1833,6 +1841,146 @@ static u64 gk20a_vm_map_duplicate_locked(struct vm_gk20a *vm,
1833	return mapped_buffer->addr;	1841	return mapped_buffer->addr;
1834	}	1842	}
1835		1843
		1844	#if defined(CONFIG_GK20A_VIDMEM)
		1845	static struct sg_table *gk20a_vidbuf_map_dma_buf(
		1846	struct dma_buf_attachment *attach, enum dma_data_direction dir)
		1847	{
		1848	struct gk20a_vidmem_buf *buf = attach->dmabuf->priv;
		1849
		1850	return buf->mem.sgt;
		1851	}
		1852
		1853	static void gk20a_vidbuf_unmap_dma_buf(struct dma_buf_attachment *attach,
		1854	struct sg_table *sgt,
		1855	enum dma_data_direction dir)
		1856	{
		1857	}
		1858
		1859	static void gk20a_vidbuf_release(struct dma_buf *dmabuf)
		1860	{
		1861	struct gk20a_vidmem_buf *buf = dmabuf->priv;
		1862
		1863	gk20a_dbg_fn("");
		1864
		1865	if (buf->dmabuf_priv)
		1866	buf->dmabuf_priv_delete(buf->dmabuf_priv);
		1867
		1868	gk20a_gmmu_free(buf->g, &buf->mem);
		1869	kfree(buf);
		1870	}
		1871
		1872	static void gk20a_vidbuf_kmap(struct dma_buf dmabuf, unsigned long page_num)
		1873	{
		1874	WARN_ON("Not supported");
		1875	return NULL;
		1876	}
		1877
		1878	static void gk20a_vidbuf_kmap_atomic(struct dma_buf dmabuf,
		1879	unsigned long page_num)
		1880	{
		1881	WARN_ON("Not supported");
		1882	return NULL;
		1883	}
		1884
		1885	static int gk20a_vidbuf_mmap(struct dma_buf dmabuf, struct vm_area_struct vma)
		1886	{
		1887	return -EINVAL;
		1888	}
		1889
		1890	static int gk20a_vidbuf_set_private(struct dma_buf *dmabuf,
		1891	struct device dev, void priv, void (delete)(void priv))
		1892	{
		1893	struct gk20a_vidmem_buf *buf = dmabuf->priv;
		1894
		1895	buf->dmabuf_priv = priv;
		1896	buf->dmabuf_priv_delete = delete;
		1897
		1898	return 0;
		1899	}
		1900
		1901	static void gk20a_vidbuf_get_private(struct dma_buf dmabuf,
		1902	struct device *dev)
		1903	{
		1904	struct gk20a_vidmem_buf *buf = dmabuf->priv;
		1905
		1906	return buf->dmabuf_priv;
		1907	}
		1908
		1909	static const struct dma_buf_ops gk20a_vidbuf_ops = {
		1910	.map_dma_buf = gk20a_vidbuf_map_dma_buf,
		1911	.unmap_dma_buf = gk20a_vidbuf_unmap_dma_buf,
		1912	.release = gk20a_vidbuf_release,
		1913	.kmap_atomic = gk20a_vidbuf_kmap_atomic,
		1914	.kmap = gk20a_vidbuf_kmap,
		1915	.mmap = gk20a_vidbuf_mmap,
		1916	.set_drvdata = gk20a_vidbuf_set_private,
		1917	.get_drvdata = gk20a_vidbuf_get_private,
		1918	};
		1919
		1920	static struct dma_buf gk20a_vidbuf_export(struct gk20a_vidmem_buf buf)
		1921	{
		1922	#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)
		1923	DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
		1924
		1925	exp_info.priv = buf;
		1926	exp_info.ops = &gk20a_vidbuf_ops;
		1927	exp_info.size = buf->mem.size;
		1928	exp_info.flags = O_RDWR;
		1929
		1930	return dma_buf_export(&exp_info);
		1931	#else
		1932	return dma_buf_export(buf, &gk20a_vidbuf_ops, buf->mem.size,
		1933	O_RDWR, NULL);
		1934	#endif
		1935	}
		1936	#endif
		1937
		1938	int gk20a_vidmem_buf_alloc(struct gk20a *g, size_t bytes)
		1939	{
		1940	#if defined(CONFIG_GK20A_VIDMEM)
		1941	struct gk20a_vidmem_buf *buf;
		1942	int err, fd;
		1943
		1944	gk20a_dbg_fn("");
		1945
		1946	buf = kzalloc(sizeof(*buf), GFP_KERNEL);
		1947	if (!buf)
		1948	return -ENOMEM;
		1949
		1950	buf->g = g;
		1951
		1952	err = gk20a_gmmu_alloc_vid(g, bytes, &buf->mem);
		1953	if (err)
		1954	goto err_kfree;
		1955
		1956	buf->dmabuf = gk20a_vidbuf_export(buf);
		1957	if (IS_ERR(buf->dmabuf)) {
		1958	err = PTR_ERR(buf->dmabuf);
		1959	goto err_bfree;
		1960	}
		1961
		1962	fd = get_unused_fd_flags(O_RDWR);
		1963	if (fd < 0) {
		1964	/* ->release frees what we have done */
		1965	dma_buf_put(buf->dmabuf);
		1966	return fd;
		1967	}
		1968
		1969	/* fclose() on this drops one ref, freeing the dma buf */
		1970	fd_install(fd, buf->dmabuf->file);
		1971
		1972	return fd;
		1973
		1974	err_bfree:
		1975	gk20a_gmmu_free(g, &buf->mem);
		1976	err_kfree:
		1977	kfree(buf);
		1978	return err;
		1979	#else
		1980	return -ENOSYS;
		1981	#endif
		1982	}
		1983
1836	u64 gk20a_vm_map(struct vm_gk20a *vm,	1984	u64 gk20a_vm_map(struct vm_gk20a *vm,
1837	struct dma_buf *dmabuf,	1985	struct dma_buf *dmabuf,
1838	u64 offset_align,	1986	u64 offset_align,


diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index d7503948..5f0ce657 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -718,6 +718,8 @@ void gk20a_vm_mapping_batch_finish_locked(
718	struct vm_gk20a vm, struct vm_gk20a_mapping_batch batch);	718	struct vm_gk20a vm, struct vm_gk20a_mapping_batch batch);
719		719
720		720
		721	int gk20a_vidmem_buf_alloc(struct gk20a *g, size_t bytes);
		722
721	/* Note: batch may be NULL if map op is not part of a batch */	723	/* Note: batch may be NULL if map op is not part of a batch */
722	int gk20a_vm_map_buffer(struct vm_gk20a *vm,	724	int gk20a_vm_map_buffer(struct vm_gk20a *vm,
723	int dmabuf_fd,	725	int dmabuf_fd,


diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h index aa950dfa..66ea05b3 100644 --- a/include/uapi/linux/nvgpu.h +++ b/include/uapi/linux/nvgpu.h
@@ -196,6 +196,8 @@ struct nvgpu_gpu_characteristics {
196	__u32 default_graphics_preempt_mode; /* NVGPU_GRAPHICS_PREEMPTION_MODE_* */	196	__u32 default_graphics_preempt_mode; /* NVGPU_GRAPHICS_PREEMPTION_MODE_* */
197	__u32 default_compute_preempt_mode; /* NVGPU_COMPUTE_PREEMPTION_MODE_* */	197	__u32 default_compute_preempt_mode; /* NVGPU_COMPUTE_PREEMPTION_MODE_* */
198		198
		199	__u64 local_video_memory_size; /* in bytes, non-zero only for dGPUs */
		200
199	/* Notes:	201	/* Notes:
200	- This struct can be safely appended with new fields. However, always	202	- This struct can be safely appended with new fields. However, always
201	keep the structure size multiple of 8 and make sure that the binary	203	keep the structure size multiple of 8 and make sure that the binary
@@ -434,6 +436,72 @@ struct nvgpu_gpu_get_engine_info_args {
434	__u64 engine_info_buf_addr;	436	__u64 engine_info_buf_addr;
435	};	437	};
436		438
		439	#define NVGPU_GPU_ALLOC_VIDMEM_FLAG_CONTIGUOUS (1U << 0)
		440
		441	/* CPU access and coherency flags (3 bits). Use CPU access with care,
		442	* BAR resources are scarce. */
		443	#define NVGPU_GPU_ALLOC_VIDMEM_FLAG_CPU_NOT_MAPPABLE (0U << 1)
		444	#define NVGPU_GPU_ALLOC_VIDMEM_FLAG_CPU_WRITE_COMBINE (1U << 1)
		445	#define NVGPU_GPU_ALLOC_VIDMEM_FLAG_CPU_CACHED (2U << 1)
		446	#define NVGPU_GPU_ALLOC_VIDMEM_FLAG_CPU_MASK (7U << 1)
		447
		448	#define NVGPU_GPU_ALLOC_VIDMEM_FLAG_VPR (1U << 4)
		449
		450	/* Allocation of device-specific local video memory. Returns dmabuf fd
		451	* on success. */
		452	struct nvgpu_gpu_alloc_vidmem_args {
		453	union {
		454	struct {
		455	/* Size for allocation. Must be a multiple of
		456	* small page size. */
		457	__u64 size;
		458
		459	/* NVGPU_GPU_ALLOC_VIDMEM_FLAG_* */
		460	__u32 flags;
		461
		462	/* Informational mem tag for resource usage
		463	* tracking. */
		464	__u16 memtag;
		465
		466	__u16 reserved0;
		467
		468	/* GPU-visible physical memory alignment in
		469	* bytes.
		470	*
		471	* Alignment must be a power of two. Minimum
		472	* alignment is the small page size, which 0
		473	* also denotes.
		474	*
		475	* For contiguous and non-contiguous
		476	* allocations, the start address of the
		477	* physical memory allocation will be aligned
		478	* by this value.
		479	*
		480	* For non-contiguous allocations, memory is
		481	* internally allocated in round_up(size /
		482	* alignment) contiguous blocks. The start
		483	* address of each block is aligned by the
		484	* alignment value. If the size is not a
		485	* multiple of alignment (which is ok), the
		486	* last allocation block size is (size %
		487	* alignment).
		488	*
		489	* By specifying the big page size here and
		490	* allocation size that is a multiple of big
		491	* pages, it will be guaranteed that the
		492	* allocated buffer is big page size mappable.
		493	*/
		494	__u32 alignment;
		495
		496	__u32 reserved1[3];
		497	} in;
		498
		499	struct {
		500	__s32 dmabuf_fd;
		501	} out;
		502	};
		503	};
		504
437	#define NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE \	505	#define NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE \
438	_IOR(NVGPU_GPU_IOCTL_MAGIC, 1, struct nvgpu_gpu_zcull_get_ctx_size_args)	506	_IOR(NVGPU_GPU_IOCTL_MAGIC, 1, struct nvgpu_gpu_zcull_get_ctx_size_args)
439	#define NVGPU_GPU_IOCTL_ZCULL_GET_INFO \	507	#define NVGPU_GPU_IOCTL_ZCULL_GET_INFO \
@@ -489,8 +557,11 @@ struct nvgpu_gpu_get_engine_info_args {
489	#define NVGPU_GPU_IOCTL_GET_ENGINE_INFO \	557	#define NVGPU_GPU_IOCTL_GET_ENGINE_INFO \
490	_IOWR(NVGPU_GPU_IOCTL_MAGIC, 26, \	558	_IOWR(NVGPU_GPU_IOCTL_MAGIC, 26, \
491	struct nvgpu_gpu_get_engine_info_args)	559	struct nvgpu_gpu_get_engine_info_args)
		560	#define NVGPU_GPU_IOCTL_ALLOC_VIDMEM \
		561	_IOWR(NVGPU_GPU_IOCTL_MAGIC, 27, \
		562	struct nvgpu_gpu_alloc_vidmem_args)
492	#define NVGPU_GPU_IOCTL_LAST \	563	#define NVGPU_GPU_IOCTL_LAST \
493	_IOC_NR(NVGPU_GPU_IOCTL_GET_ENGINE_INFO)	564	_IOC_NR(NVGPU_GPU_IOCTL_ALLOC_VIDMEM)
494	#define NVGPU_GPU_IOCTL_MAX_ARG_SIZE \	565	#define NVGPU_GPU_IOCTL_MAX_ARG_SIZE \
495	sizeof(struct nvgpu_gpu_get_cpu_time_correlation_info_args)	566	sizeof(struct nvgpu_gpu_get_cpu_time_correlation_info_args)
496		567