summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKonsta Holtta <kholtta@nvidia.com>2016-07-15 08:52:52 -0400
committerVijayakumar Subbu <vsubbu@nvidia.com>2016-07-21 08:55:26 -0400
commit13231006671a1da11cfaf7a67e69430199820788 (patch)
tree6b4053838d672b158fe636b60768240585a21eb0
parent83071083d779b67ad73172675a6dfa34ed19b414 (diff)
gpu: nvgpu: add vidmem allocation ioctl
Add NVGPU_GPU_IOCTL_ALLOC_VIDMEM to the ctrl fd for letting userspace allocate on-board GPU memory (aka vidmem). The allocations are returned as dmabuf fds. Also, report the amount of local video memory in the gpu characteristics. Jira DNVGPU-19 Jira DNVGPU-38 Change-Id: I28e361d31bb630b96d06bb1c86d022d91c7592bc Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: http://git-master/r/1181152 GVS: Gerrit_Virtual_Submit Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
-rw-r--r--drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c48
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.c1
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c148
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.h2
-rw-r--r--include/uapi/linux/nvgpu.h73
5 files changed, 271 insertions, 1 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
index 3e34b6b8..6b832670 100644
--- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
@@ -704,6 +704,49 @@ clean_up:
704 return err; 704 return err;
705} 705}
706 706
707static int nvgpu_gpu_alloc_vidmem(struct gk20a *g,
708 struct nvgpu_gpu_alloc_vidmem_args *args)
709{
710 u32 align = args->in.alignment ? args->in.alignment : SZ_4K;
711 int fd;
712
713 gk20a_dbg_fn("");
714
715 /* not yet supported */
716 if (WARN_ON(args->in.flags & NVGPU_GPU_ALLOC_VIDMEM_FLAG_CPU_MASK))
717 return -EINVAL;
718
719 /* not yet supported */
720 if (WARN_ON(args->in.flags & NVGPU_GPU_ALLOC_VIDMEM_FLAG_VPR))
721 return -EINVAL;
722
723 if (args->in.size & (SZ_4K - 1))
724 return -EINVAL;
725
726 if (!args->in.size)
727 return -EINVAL;
728
729 if (align & (align - 1))
730 return -EINVAL;
731
732 if (align > roundup_pow_of_two(args->in.size)) {
733 /* log this special case, buddy allocator detail */
734 gk20a_warn(dev_from_gk20a(g),
735 "alignment larger than buffer size rounded up to power of 2 is not supported");
736 return -EINVAL;
737 }
738
739 fd = gk20a_vidmem_buf_alloc(g, args->in.size);
740 if (fd < 0)
741 return fd;
742
743 args->out.dmabuf_fd = fd;
744
745 gk20a_dbg_fn("done, fd=%d", fd);
746
747 return 0;
748}
749
707long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 750long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
708{ 751{
709 struct device *dev = filp->private_data; 752 struct device *dev = filp->private_data;
@@ -951,6 +994,11 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
951 (struct nvgpu_gpu_get_engine_info_args *)buf); 994 (struct nvgpu_gpu_get_engine_info_args *)buf);
952 break; 995 break;
953 996
997 case NVGPU_GPU_IOCTL_ALLOC_VIDMEM:
998 err = nvgpu_gpu_alloc_vidmem(g,
999 (struct nvgpu_gpu_alloc_vidmem_args *)buf);
1000 break;
1001
954 default: 1002 default:
955 dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd); 1003 dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd);
956 err = -ENOTTY; 1004 err = -ENOTTY;
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index 04f82033..bb8cb33f 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -2148,6 +2148,7 @@ int gk20a_init_gpu_characteristics(struct gk20a *g)
2148 gpu->default_compute_preempt_mode = 2148 gpu->default_compute_preempt_mode =
2149 g->gr.preemption_mode_rec.default_compute_preempt_mode; 2149 g->gr.preemption_mode_rec.default_compute_preempt_mode;
2150 2150
2151 gpu->local_video_memory_size = g->mm.vidmem.size;
2151 2152
2152 return 0; 2153 return 0;
2153} 2154}
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index bf3d990c..2dcc4363 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -411,6 +411,14 @@ struct gk20a_dmabuf_priv {
411 u64 buffer_id; 411 u64 buffer_id;
412}; 412};
413 413
414struct gk20a_vidmem_buf {
415 struct gk20a *g;
416 struct mem_desc mem;
417 struct dma_buf *dmabuf;
418 void *dmabuf_priv;
419 void (*dmabuf_priv_delete)(void *);
420};
421
414static void gk20a_vm_remove_support_nofree(struct vm_gk20a *vm); 422static void gk20a_vm_remove_support_nofree(struct vm_gk20a *vm);
415 423
416static int gk20a_comptaglines_alloc(struct gk20a_comptag_allocator *allocator, 424static int gk20a_comptaglines_alloc(struct gk20a_comptag_allocator *allocator,
@@ -1833,6 +1841,146 @@ static u64 gk20a_vm_map_duplicate_locked(struct vm_gk20a *vm,
1833 return mapped_buffer->addr; 1841 return mapped_buffer->addr;
1834} 1842}
1835 1843
1844#if defined(CONFIG_GK20A_VIDMEM)
1845static struct sg_table *gk20a_vidbuf_map_dma_buf(
1846 struct dma_buf_attachment *attach, enum dma_data_direction dir)
1847{
1848 struct gk20a_vidmem_buf *buf = attach->dmabuf->priv;
1849
1850 return buf->mem.sgt;
1851}
1852
1853static void gk20a_vidbuf_unmap_dma_buf(struct dma_buf_attachment *attach,
1854 struct sg_table *sgt,
1855 enum dma_data_direction dir)
1856{
1857}
1858
1859static void gk20a_vidbuf_release(struct dma_buf *dmabuf)
1860{
1861 struct gk20a_vidmem_buf *buf = dmabuf->priv;
1862
1863 gk20a_dbg_fn("");
1864
1865 if (buf->dmabuf_priv)
1866 buf->dmabuf_priv_delete(buf->dmabuf_priv);
1867
1868 gk20a_gmmu_free(buf->g, &buf->mem);
1869 kfree(buf);
1870}
1871
1872static void *gk20a_vidbuf_kmap(struct dma_buf *dmabuf, unsigned long page_num)
1873{
1874 WARN_ON("Not supported");
1875 return NULL;
1876}
1877
1878static void *gk20a_vidbuf_kmap_atomic(struct dma_buf *dmabuf,
1879 unsigned long page_num)
1880{
1881 WARN_ON("Not supported");
1882 return NULL;
1883}
1884
1885static int gk20a_vidbuf_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
1886{
1887 return -EINVAL;
1888}
1889
1890static int gk20a_vidbuf_set_private(struct dma_buf *dmabuf,
1891 struct device *dev, void *priv, void (*delete)(void *priv))
1892{
1893 struct gk20a_vidmem_buf *buf = dmabuf->priv;
1894
1895 buf->dmabuf_priv = priv;
1896 buf->dmabuf_priv_delete = delete;
1897
1898 return 0;
1899}
1900
1901static void *gk20a_vidbuf_get_private(struct dma_buf *dmabuf,
1902 struct device *dev)
1903{
1904 struct gk20a_vidmem_buf *buf = dmabuf->priv;
1905
1906 return buf->dmabuf_priv;
1907}
1908
1909static const struct dma_buf_ops gk20a_vidbuf_ops = {
1910 .map_dma_buf = gk20a_vidbuf_map_dma_buf,
1911 .unmap_dma_buf = gk20a_vidbuf_unmap_dma_buf,
1912 .release = gk20a_vidbuf_release,
1913 .kmap_atomic = gk20a_vidbuf_kmap_atomic,
1914 .kmap = gk20a_vidbuf_kmap,
1915 .mmap = gk20a_vidbuf_mmap,
1916 .set_drvdata = gk20a_vidbuf_set_private,
1917 .get_drvdata = gk20a_vidbuf_get_private,
1918};
1919
1920static struct dma_buf *gk20a_vidbuf_export(struct gk20a_vidmem_buf *buf)
1921{
1922#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)
1923 DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
1924
1925 exp_info.priv = buf;
1926 exp_info.ops = &gk20a_vidbuf_ops;
1927 exp_info.size = buf->mem.size;
1928 exp_info.flags = O_RDWR;
1929
1930 return dma_buf_export(&exp_info);
1931#else
1932 return dma_buf_export(buf, &gk20a_vidbuf_ops, buf->mem.size,
1933 O_RDWR, NULL);
1934#endif
1935}
1936#endif
1937
1938int gk20a_vidmem_buf_alloc(struct gk20a *g, size_t bytes)
1939{
1940#if defined(CONFIG_GK20A_VIDMEM)
1941 struct gk20a_vidmem_buf *buf;
1942 int err, fd;
1943
1944 gk20a_dbg_fn("");
1945
1946 buf = kzalloc(sizeof(*buf), GFP_KERNEL);
1947 if (!buf)
1948 return -ENOMEM;
1949
1950 buf->g = g;
1951
1952 err = gk20a_gmmu_alloc_vid(g, bytes, &buf->mem);
1953 if (err)
1954 goto err_kfree;
1955
1956 buf->dmabuf = gk20a_vidbuf_export(buf);
1957 if (IS_ERR(buf->dmabuf)) {
1958 err = PTR_ERR(buf->dmabuf);
1959 goto err_bfree;
1960 }
1961
1962 fd = get_unused_fd_flags(O_RDWR);
1963 if (fd < 0) {
1964 /* ->release frees what we have done */
1965 dma_buf_put(buf->dmabuf);
1966 return fd;
1967 }
1968
1969 /* fclose() on this drops one ref, freeing the dma buf */
1970 fd_install(fd, buf->dmabuf->file);
1971
1972 return fd;
1973
1974err_bfree:
1975 gk20a_gmmu_free(g, &buf->mem);
1976err_kfree:
1977 kfree(buf);
1978 return err;
1979#else
1980 return -ENOSYS;
1981#endif
1982}
1983
1836u64 gk20a_vm_map(struct vm_gk20a *vm, 1984u64 gk20a_vm_map(struct vm_gk20a *vm,
1837 struct dma_buf *dmabuf, 1985 struct dma_buf *dmabuf,
1838 u64 offset_align, 1986 u64 offset_align,
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index d7503948..5f0ce657 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -718,6 +718,8 @@ void gk20a_vm_mapping_batch_finish_locked(
718 struct vm_gk20a *vm, struct vm_gk20a_mapping_batch *batch); 718 struct vm_gk20a *vm, struct vm_gk20a_mapping_batch *batch);
719 719
720 720
721int gk20a_vidmem_buf_alloc(struct gk20a *g, size_t bytes);
722
721/* Note: batch may be NULL if map op is not part of a batch */ 723/* Note: batch may be NULL if map op is not part of a batch */
722int gk20a_vm_map_buffer(struct vm_gk20a *vm, 724int gk20a_vm_map_buffer(struct vm_gk20a *vm,
723 int dmabuf_fd, 725 int dmabuf_fd,
diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h
index aa950dfa..66ea05b3 100644
--- a/include/uapi/linux/nvgpu.h
+++ b/include/uapi/linux/nvgpu.h
@@ -196,6 +196,8 @@ struct nvgpu_gpu_characteristics {
196 __u32 default_graphics_preempt_mode; /* NVGPU_GRAPHICS_PREEMPTION_MODE_* */ 196 __u32 default_graphics_preempt_mode; /* NVGPU_GRAPHICS_PREEMPTION_MODE_* */
197 __u32 default_compute_preempt_mode; /* NVGPU_COMPUTE_PREEMPTION_MODE_* */ 197 __u32 default_compute_preempt_mode; /* NVGPU_COMPUTE_PREEMPTION_MODE_* */
198 198
199 __u64 local_video_memory_size; /* in bytes, non-zero only for dGPUs */
200
199 /* Notes: 201 /* Notes:
200 - This struct can be safely appended with new fields. However, always 202 - This struct can be safely appended with new fields. However, always
201 keep the structure size multiple of 8 and make sure that the binary 203 keep the structure size multiple of 8 and make sure that the binary
@@ -434,6 +436,72 @@ struct nvgpu_gpu_get_engine_info_args {
434 __u64 engine_info_buf_addr; 436 __u64 engine_info_buf_addr;
435}; 437};
436 438
439#define NVGPU_GPU_ALLOC_VIDMEM_FLAG_CONTIGUOUS (1U << 0)
440
441/* CPU access and coherency flags (3 bits). Use CPU access with care,
442 * BAR resources are scarce. */
443#define NVGPU_GPU_ALLOC_VIDMEM_FLAG_CPU_NOT_MAPPABLE (0U << 1)
444#define NVGPU_GPU_ALLOC_VIDMEM_FLAG_CPU_WRITE_COMBINE (1U << 1)
445#define NVGPU_GPU_ALLOC_VIDMEM_FLAG_CPU_CACHED (2U << 1)
446#define NVGPU_GPU_ALLOC_VIDMEM_FLAG_CPU_MASK (7U << 1)
447
448#define NVGPU_GPU_ALLOC_VIDMEM_FLAG_VPR (1U << 4)
449
450/* Allocation of device-specific local video memory. Returns dmabuf fd
451 * on success. */
452struct nvgpu_gpu_alloc_vidmem_args {
453 union {
454 struct {
455 /* Size for allocation. Must be a multiple of
456 * small page size. */
457 __u64 size;
458
459 /* NVGPU_GPU_ALLOC_VIDMEM_FLAG_* */
460 __u32 flags;
461
462 /* Informational mem tag for resource usage
463 * tracking. */
464 __u16 memtag;
465
466 __u16 reserved0;
467
468 /* GPU-visible physical memory alignment in
469 * bytes.
470 *
471 * Alignment must be a power of two. Minimum
472 * alignment is the small page size, which 0
473 * also denotes.
474 *
475 * For contiguous and non-contiguous
476 * allocations, the start address of the
477 * physical memory allocation will be aligned
478 * by this value.
479 *
480 * For non-contiguous allocations, memory is
481 * internally allocated in round_up(size /
482 * alignment) contiguous blocks. The start
483 * address of each block is aligned by the
484 * alignment value. If the size is not a
485 * multiple of alignment (which is ok), the
486 * last allocation block size is (size %
487 * alignment).
488 *
489 * By specifying the big page size here and
490 * allocation size that is a multiple of big
491 * pages, it will be guaranteed that the
492 * allocated buffer is big page size mappable.
493 */
494 __u32 alignment;
495
496 __u32 reserved1[3];
497 } in;
498
499 struct {
500 __s32 dmabuf_fd;
501 } out;
502 };
503};
504
437#define NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE \ 505#define NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE \
438 _IOR(NVGPU_GPU_IOCTL_MAGIC, 1, struct nvgpu_gpu_zcull_get_ctx_size_args) 506 _IOR(NVGPU_GPU_IOCTL_MAGIC, 1, struct nvgpu_gpu_zcull_get_ctx_size_args)
439#define NVGPU_GPU_IOCTL_ZCULL_GET_INFO \ 507#define NVGPU_GPU_IOCTL_ZCULL_GET_INFO \
@@ -489,8 +557,11 @@ struct nvgpu_gpu_get_engine_info_args {
489#define NVGPU_GPU_IOCTL_GET_ENGINE_INFO \ 557#define NVGPU_GPU_IOCTL_GET_ENGINE_INFO \
490 _IOWR(NVGPU_GPU_IOCTL_MAGIC, 26, \ 558 _IOWR(NVGPU_GPU_IOCTL_MAGIC, 26, \
491 struct nvgpu_gpu_get_engine_info_args) 559 struct nvgpu_gpu_get_engine_info_args)
560#define NVGPU_GPU_IOCTL_ALLOC_VIDMEM \
561 _IOWR(NVGPU_GPU_IOCTL_MAGIC, 27, \
562 struct nvgpu_gpu_alloc_vidmem_args)
492#define NVGPU_GPU_IOCTL_LAST \ 563#define NVGPU_GPU_IOCTL_LAST \
493 _IOC_NR(NVGPU_GPU_IOCTL_GET_ENGINE_INFO) 564 _IOC_NR(NVGPU_GPU_IOCTL_ALLOC_VIDMEM)
494#define NVGPU_GPU_IOCTL_MAX_ARG_SIZE \ 565#define NVGPU_GPU_IOCTL_MAX_ARG_SIZE \
495 sizeof(struct nvgpu_gpu_get_cpu_time_correlation_info_args) 566 sizeof(struct nvgpu_gpu_get_cpu_time_correlation_info_args)
496 567