From 13231006671a1da11cfaf7a67e69430199820788 Mon Sep 17 00:00:00 2001 From: Konsta Holtta Date: Fri, 15 Jul 2016 15:52:52 +0300 Subject: gpu: nvgpu: add vidmem allocation ioctl Add NVGPU_GPU_IOCTL_ALLOC_VIDMEM to the ctrl fd for letting userspace allocate on-board GPU memory (aka vidmem). The allocations are returned as dmabuf fds. Also, report the amount of local video memory in the gpu characteristics. Jira DNVGPU-19 Jira DNVGPU-38 Change-Id: I28e361d31bb630b96d06bb1c86d022d91c7592bc Signed-off-by: Konsta Holtta Reviewed-on: http://git-master/r/1181152 GVS: Gerrit_Virtual_Submit Reviewed-by: Vijayakumar Subbu --- drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c | 48 ++++++++++++ drivers/gpu/nvgpu/gk20a/gk20a.c | 1 + drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 148 +++++++++++++++++++++++++++++++++++ drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 2 + include/uapi/linux/nvgpu.h | 73 ++++++++++++++++- 5 files changed, 271 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c index 3e34b6b8..6b832670 100644 --- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c @@ -704,6 +704,49 @@ clean_up: return err; } +static int nvgpu_gpu_alloc_vidmem(struct gk20a *g, + struct nvgpu_gpu_alloc_vidmem_args *args) +{ + u32 align = args->in.alignment ? args->in.alignment : SZ_4K; + int fd; + + gk20a_dbg_fn(""); + + /* not yet supported */ + if (WARN_ON(args->in.flags & NVGPU_GPU_ALLOC_VIDMEM_FLAG_CPU_MASK)) + return -EINVAL; + + /* not yet supported */ + if (WARN_ON(args->in.flags & NVGPU_GPU_ALLOC_VIDMEM_FLAG_VPR)) + return -EINVAL; + + if (args->in.size & (SZ_4K - 1)) + return -EINVAL; + + if (!args->in.size) + return -EINVAL; + + if (align & (align - 1)) + return -EINVAL; + + if (align > roundup_pow_of_two(args->in.size)) { + /* log this special case, buddy allocator detail */ + gk20a_warn(dev_from_gk20a(g), + "alignment larger than buffer size rounded up to power of 2 is not supported"); + return -EINVAL; + } + + fd = gk20a_vidmem_buf_alloc(g, args->in.size); + if (fd < 0) + return fd; + + args->out.dmabuf_fd = fd; + + gk20a_dbg_fn("done, fd=%d", fd); + + return 0; +} + long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct device *dev = filp->private_data; @@ -951,6 +994,11 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg (struct nvgpu_gpu_get_engine_info_args *)buf); break; + case NVGPU_GPU_IOCTL_ALLOC_VIDMEM: + err = nvgpu_gpu_alloc_vidmem(g, + (struct nvgpu_gpu_alloc_vidmem_args *)buf); + break; + default: dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd); err = -ENOTTY; diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index 04f82033..bb8cb33f 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c @@ -2148,6 +2148,7 @@ int gk20a_init_gpu_characteristics(struct gk20a *g) gpu->default_compute_preempt_mode = g->gr.preemption_mode_rec.default_compute_preempt_mode; + gpu->local_video_memory_size = g->mm.vidmem.size; return 0; } diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index bf3d990c..2dcc4363 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c @@ -411,6 +411,14 @@ struct gk20a_dmabuf_priv { u64 buffer_id; }; +struct gk20a_vidmem_buf { + struct gk20a *g; + struct mem_desc mem; + struct dma_buf *dmabuf; + void *dmabuf_priv; + void (*dmabuf_priv_delete)(void *); +}; + static void gk20a_vm_remove_support_nofree(struct vm_gk20a *vm); static int gk20a_comptaglines_alloc(struct gk20a_comptag_allocator *allocator, @@ -1833,6 +1841,146 @@ static u64 gk20a_vm_map_duplicate_locked(struct vm_gk20a *vm, return mapped_buffer->addr; } +#if defined(CONFIG_GK20A_VIDMEM) +static struct sg_table *gk20a_vidbuf_map_dma_buf( + struct dma_buf_attachment *attach, enum dma_data_direction dir) +{ + struct gk20a_vidmem_buf *buf = attach->dmabuf->priv; + + return buf->mem.sgt; +} + +static void gk20a_vidbuf_unmap_dma_buf(struct dma_buf_attachment *attach, + struct sg_table *sgt, + enum dma_data_direction dir) +{ +} + +static void gk20a_vidbuf_release(struct dma_buf *dmabuf) +{ + struct gk20a_vidmem_buf *buf = dmabuf->priv; + + gk20a_dbg_fn(""); + + if (buf->dmabuf_priv) + buf->dmabuf_priv_delete(buf->dmabuf_priv); + + gk20a_gmmu_free(buf->g, &buf->mem); + kfree(buf); +} + +static void *gk20a_vidbuf_kmap(struct dma_buf *dmabuf, unsigned long page_num) +{ + WARN_ON("Not supported"); + return NULL; +} + +static void *gk20a_vidbuf_kmap_atomic(struct dma_buf *dmabuf, + unsigned long page_num) +{ + WARN_ON("Not supported"); + return NULL; +} + +static int gk20a_vidbuf_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma) +{ + return -EINVAL; +} + +static int gk20a_vidbuf_set_private(struct dma_buf *dmabuf, + struct device *dev, void *priv, void (*delete)(void *priv)) +{ + struct gk20a_vidmem_buf *buf = dmabuf->priv; + + buf->dmabuf_priv = priv; + buf->dmabuf_priv_delete = delete; + + return 0; +} + +static void *gk20a_vidbuf_get_private(struct dma_buf *dmabuf, + struct device *dev) +{ + struct gk20a_vidmem_buf *buf = dmabuf->priv; + + return buf->dmabuf_priv; +} + +static const struct dma_buf_ops gk20a_vidbuf_ops = { + .map_dma_buf = gk20a_vidbuf_map_dma_buf, + .unmap_dma_buf = gk20a_vidbuf_unmap_dma_buf, + .release = gk20a_vidbuf_release, + .kmap_atomic = gk20a_vidbuf_kmap_atomic, + .kmap = gk20a_vidbuf_kmap, + .mmap = gk20a_vidbuf_mmap, + .set_drvdata = gk20a_vidbuf_set_private, + .get_drvdata = gk20a_vidbuf_get_private, +}; + +static struct dma_buf *gk20a_vidbuf_export(struct gk20a_vidmem_buf *buf) +{ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0) + DEFINE_DMA_BUF_EXPORT_INFO(exp_info); + + exp_info.priv = buf; + exp_info.ops = &gk20a_vidbuf_ops; + exp_info.size = buf->mem.size; + exp_info.flags = O_RDWR; + + return dma_buf_export(&exp_info); +#else + return dma_buf_export(buf, &gk20a_vidbuf_ops, buf->mem.size, + O_RDWR, NULL); +#endif +} +#endif + +int gk20a_vidmem_buf_alloc(struct gk20a *g, size_t bytes) +{ +#if defined(CONFIG_GK20A_VIDMEM) + struct gk20a_vidmem_buf *buf; + int err, fd; + + gk20a_dbg_fn(""); + + buf = kzalloc(sizeof(*buf), GFP_KERNEL); + if (!buf) + return -ENOMEM; + + buf->g = g; + + err = gk20a_gmmu_alloc_vid(g, bytes, &buf->mem); + if (err) + goto err_kfree; + + buf->dmabuf = gk20a_vidbuf_export(buf); + if (IS_ERR(buf->dmabuf)) { + err = PTR_ERR(buf->dmabuf); + goto err_bfree; + } + + fd = get_unused_fd_flags(O_RDWR); + if (fd < 0) { + /* ->release frees what we have done */ + dma_buf_put(buf->dmabuf); + return fd; + } + + /* fclose() on this drops one ref, freeing the dma buf */ + fd_install(fd, buf->dmabuf->file); + + return fd; + +err_bfree: + gk20a_gmmu_free(g, &buf->mem); +err_kfree: + kfree(buf); + return err; +#else + return -ENOSYS; +#endif +} + u64 gk20a_vm_map(struct vm_gk20a *vm, struct dma_buf *dmabuf, u64 offset_align, diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index d7503948..5f0ce657 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h @@ -718,6 +718,8 @@ void gk20a_vm_mapping_batch_finish_locked( struct vm_gk20a *vm, struct vm_gk20a_mapping_batch *batch); +int gk20a_vidmem_buf_alloc(struct gk20a *g, size_t bytes); + /* Note: batch may be NULL if map op is not part of a batch */ int gk20a_vm_map_buffer(struct vm_gk20a *vm, int dmabuf_fd, diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h index aa950dfa..66ea05b3 100644 --- a/include/uapi/linux/nvgpu.h +++ b/include/uapi/linux/nvgpu.h @@ -196,6 +196,8 @@ struct nvgpu_gpu_characteristics { __u32 default_graphics_preempt_mode; /* NVGPU_GRAPHICS_PREEMPTION_MODE_* */ __u32 default_compute_preempt_mode; /* NVGPU_COMPUTE_PREEMPTION_MODE_* */ + __u64 local_video_memory_size; /* in bytes, non-zero only for dGPUs */ + /* Notes: - This struct can be safely appended with new fields. However, always keep the structure size multiple of 8 and make sure that the binary @@ -434,6 +436,72 @@ struct nvgpu_gpu_get_engine_info_args { __u64 engine_info_buf_addr; }; +#define NVGPU_GPU_ALLOC_VIDMEM_FLAG_CONTIGUOUS (1U << 0) + +/* CPU access and coherency flags (3 bits). Use CPU access with care, + * BAR resources are scarce. */ +#define NVGPU_GPU_ALLOC_VIDMEM_FLAG_CPU_NOT_MAPPABLE (0U << 1) +#define NVGPU_GPU_ALLOC_VIDMEM_FLAG_CPU_WRITE_COMBINE (1U << 1) +#define NVGPU_GPU_ALLOC_VIDMEM_FLAG_CPU_CACHED (2U << 1) +#define NVGPU_GPU_ALLOC_VIDMEM_FLAG_CPU_MASK (7U << 1) + +#define NVGPU_GPU_ALLOC_VIDMEM_FLAG_VPR (1U << 4) + +/* Allocation of device-specific local video memory. Returns dmabuf fd + * on success. */ +struct nvgpu_gpu_alloc_vidmem_args { + union { + struct { + /* Size for allocation. Must be a multiple of + * small page size. */ + __u64 size; + + /* NVGPU_GPU_ALLOC_VIDMEM_FLAG_* */ + __u32 flags; + + /* Informational mem tag for resource usage + * tracking. */ + __u16 memtag; + + __u16 reserved0; + + /* GPU-visible physical memory alignment in + * bytes. + * + * Alignment must be a power of two. Minimum + * alignment is the small page size, which 0 + * also denotes. + * + * For contiguous and non-contiguous + * allocations, the start address of the + * physical memory allocation will be aligned + * by this value. + * + * For non-contiguous allocations, memory is + * internally allocated in round_up(size / + * alignment) contiguous blocks. The start + * address of each block is aligned by the + * alignment value. If the size is not a + * multiple of alignment (which is ok), the + * last allocation block size is (size % + * alignment). + * + * By specifying the big page size here and + * allocation size that is a multiple of big + * pages, it will be guaranteed that the + * allocated buffer is big page size mappable. + */ + __u32 alignment; + + __u32 reserved1[3]; + } in; + + struct { + __s32 dmabuf_fd; + } out; + }; +}; + #define NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE \ _IOR(NVGPU_GPU_IOCTL_MAGIC, 1, struct nvgpu_gpu_zcull_get_ctx_size_args) #define NVGPU_GPU_IOCTL_ZCULL_GET_INFO \ @@ -489,8 +557,11 @@ struct nvgpu_gpu_get_engine_info_args { #define NVGPU_GPU_IOCTL_GET_ENGINE_INFO \ _IOWR(NVGPU_GPU_IOCTL_MAGIC, 26, \ struct nvgpu_gpu_get_engine_info_args) +#define NVGPU_GPU_IOCTL_ALLOC_VIDMEM \ + _IOWR(NVGPU_GPU_IOCTL_MAGIC, 27, \ + struct nvgpu_gpu_alloc_vidmem_args) #define NVGPU_GPU_IOCTL_LAST \ - _IOC_NR(NVGPU_GPU_IOCTL_GET_ENGINE_INFO) + _IOC_NR(NVGPU_GPU_IOCTL_ALLOC_VIDMEM) #define NVGPU_GPU_IOCTL_MAX_ARG_SIZE \ sizeof(struct nvgpu_gpu_get_cpu_time_correlation_info_args) -- cgit v1.2.2