diff options
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c | 48 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.c | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 148 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 2 | ||||
-rw-r--r-- | include/uapi/linux/nvgpu.h | 73 |
5 files changed, 271 insertions, 1 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c index 3e34b6b8..6b832670 100644 --- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c | |||
@@ -704,6 +704,49 @@ clean_up: | |||
704 | return err; | 704 | return err; |
705 | } | 705 | } |
706 | 706 | ||
707 | static int nvgpu_gpu_alloc_vidmem(struct gk20a *g, | ||
708 | struct nvgpu_gpu_alloc_vidmem_args *args) | ||
709 | { | ||
710 | u32 align = args->in.alignment ? args->in.alignment : SZ_4K; | ||
711 | int fd; | ||
712 | |||
713 | gk20a_dbg_fn(""); | ||
714 | |||
715 | /* not yet supported */ | ||
716 | if (WARN_ON(args->in.flags & NVGPU_GPU_ALLOC_VIDMEM_FLAG_CPU_MASK)) | ||
717 | return -EINVAL; | ||
718 | |||
719 | /* not yet supported */ | ||
720 | if (WARN_ON(args->in.flags & NVGPU_GPU_ALLOC_VIDMEM_FLAG_VPR)) | ||
721 | return -EINVAL; | ||
722 | |||
723 | if (args->in.size & (SZ_4K - 1)) | ||
724 | return -EINVAL; | ||
725 | |||
726 | if (!args->in.size) | ||
727 | return -EINVAL; | ||
728 | |||
729 | if (align & (align - 1)) | ||
730 | return -EINVAL; | ||
731 | |||
732 | if (align > roundup_pow_of_two(args->in.size)) { | ||
733 | /* log this special case, buddy allocator detail */ | ||
734 | gk20a_warn(dev_from_gk20a(g), | ||
735 | "alignment larger than buffer size rounded up to power of 2 is not supported"); | ||
736 | return -EINVAL; | ||
737 | } | ||
738 | |||
739 | fd = gk20a_vidmem_buf_alloc(g, args->in.size); | ||
740 | if (fd < 0) | ||
741 | return fd; | ||
742 | |||
743 | args->out.dmabuf_fd = fd; | ||
744 | |||
745 | gk20a_dbg_fn("done, fd=%d", fd); | ||
746 | |||
747 | return 0; | ||
748 | } | ||
749 | |||
707 | long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | 750 | long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) |
708 | { | 751 | { |
709 | struct device *dev = filp->private_data; | 752 | struct device *dev = filp->private_data; |
@@ -951,6 +994,11 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg | |||
951 | (struct nvgpu_gpu_get_engine_info_args *)buf); | 994 | (struct nvgpu_gpu_get_engine_info_args *)buf); |
952 | break; | 995 | break; |
953 | 996 | ||
997 | case NVGPU_GPU_IOCTL_ALLOC_VIDMEM: | ||
998 | err = nvgpu_gpu_alloc_vidmem(g, | ||
999 | (struct nvgpu_gpu_alloc_vidmem_args *)buf); | ||
1000 | break; | ||
1001 | |||
954 | default: | 1002 | default: |
955 | dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd); | 1003 | dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd); |
956 | err = -ENOTTY; | 1004 | err = -ENOTTY; |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index 04f82033..bb8cb33f 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c | |||
@@ -2148,6 +2148,7 @@ int gk20a_init_gpu_characteristics(struct gk20a *g) | |||
2148 | gpu->default_compute_preempt_mode = | 2148 | gpu->default_compute_preempt_mode = |
2149 | g->gr.preemption_mode_rec.default_compute_preempt_mode; | 2149 | g->gr.preemption_mode_rec.default_compute_preempt_mode; |
2150 | 2150 | ||
2151 | gpu->local_video_memory_size = g->mm.vidmem.size; | ||
2151 | 2152 | ||
2152 | return 0; | 2153 | return 0; |
2153 | } | 2154 | } |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index bf3d990c..2dcc4363 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -411,6 +411,14 @@ struct gk20a_dmabuf_priv { | |||
411 | u64 buffer_id; | 411 | u64 buffer_id; |
412 | }; | 412 | }; |
413 | 413 | ||
414 | struct gk20a_vidmem_buf { | ||
415 | struct gk20a *g; | ||
416 | struct mem_desc mem; | ||
417 | struct dma_buf *dmabuf; | ||
418 | void *dmabuf_priv; | ||
419 | void (*dmabuf_priv_delete)(void *); | ||
420 | }; | ||
421 | |||
414 | static void gk20a_vm_remove_support_nofree(struct vm_gk20a *vm); | 422 | static void gk20a_vm_remove_support_nofree(struct vm_gk20a *vm); |
415 | 423 | ||
416 | static int gk20a_comptaglines_alloc(struct gk20a_comptag_allocator *allocator, | 424 | static int gk20a_comptaglines_alloc(struct gk20a_comptag_allocator *allocator, |
@@ -1833,6 +1841,146 @@ static u64 gk20a_vm_map_duplicate_locked(struct vm_gk20a *vm, | |||
1833 | return mapped_buffer->addr; | 1841 | return mapped_buffer->addr; |
1834 | } | 1842 | } |
1835 | 1843 | ||
1844 | #if defined(CONFIG_GK20A_VIDMEM) | ||
1845 | static struct sg_table *gk20a_vidbuf_map_dma_buf( | ||
1846 | struct dma_buf_attachment *attach, enum dma_data_direction dir) | ||
1847 | { | ||
1848 | struct gk20a_vidmem_buf *buf = attach->dmabuf->priv; | ||
1849 | |||
1850 | return buf->mem.sgt; | ||
1851 | } | ||
1852 | |||
1853 | static void gk20a_vidbuf_unmap_dma_buf(struct dma_buf_attachment *attach, | ||
1854 | struct sg_table *sgt, | ||
1855 | enum dma_data_direction dir) | ||
1856 | { | ||
1857 | } | ||
1858 | |||
1859 | static void gk20a_vidbuf_release(struct dma_buf *dmabuf) | ||
1860 | { | ||
1861 | struct gk20a_vidmem_buf *buf = dmabuf->priv; | ||
1862 | |||
1863 | gk20a_dbg_fn(""); | ||
1864 | |||
1865 | if (buf->dmabuf_priv) | ||
1866 | buf->dmabuf_priv_delete(buf->dmabuf_priv); | ||
1867 | |||
1868 | gk20a_gmmu_free(buf->g, &buf->mem); | ||
1869 | kfree(buf); | ||
1870 | } | ||
1871 | |||
1872 | static void *gk20a_vidbuf_kmap(struct dma_buf *dmabuf, unsigned long page_num) | ||
1873 | { | ||
1874 | WARN_ON("Not supported"); | ||
1875 | return NULL; | ||
1876 | } | ||
1877 | |||
1878 | static void *gk20a_vidbuf_kmap_atomic(struct dma_buf *dmabuf, | ||
1879 | unsigned long page_num) | ||
1880 | { | ||
1881 | WARN_ON("Not supported"); | ||
1882 | return NULL; | ||
1883 | } | ||
1884 | |||
1885 | static int gk20a_vidbuf_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma) | ||
1886 | { | ||
1887 | return -EINVAL; | ||
1888 | } | ||
1889 | |||
1890 | static int gk20a_vidbuf_set_private(struct dma_buf *dmabuf, | ||
1891 | struct device *dev, void *priv, void (*delete)(void *priv)) | ||
1892 | { | ||
1893 | struct gk20a_vidmem_buf *buf = dmabuf->priv; | ||
1894 | |||
1895 | buf->dmabuf_priv = priv; | ||
1896 | buf->dmabuf_priv_delete = delete; | ||
1897 | |||
1898 | return 0; | ||
1899 | } | ||
1900 | |||
1901 | static void *gk20a_vidbuf_get_private(struct dma_buf *dmabuf, | ||
1902 | struct device *dev) | ||
1903 | { | ||
1904 | struct gk20a_vidmem_buf *buf = dmabuf->priv; | ||
1905 | |||
1906 | return buf->dmabuf_priv; | ||
1907 | } | ||
1908 | |||
1909 | static const struct dma_buf_ops gk20a_vidbuf_ops = { | ||
1910 | .map_dma_buf = gk20a_vidbuf_map_dma_buf, | ||
1911 | .unmap_dma_buf = gk20a_vidbuf_unmap_dma_buf, | ||
1912 | .release = gk20a_vidbuf_release, | ||
1913 | .kmap_atomic = gk20a_vidbuf_kmap_atomic, | ||
1914 | .kmap = gk20a_vidbuf_kmap, | ||
1915 | .mmap = gk20a_vidbuf_mmap, | ||
1916 | .set_drvdata = gk20a_vidbuf_set_private, | ||
1917 | .get_drvdata = gk20a_vidbuf_get_private, | ||
1918 | }; | ||
1919 | |||
1920 | static struct dma_buf *gk20a_vidbuf_export(struct gk20a_vidmem_buf *buf) | ||
1921 | { | ||
1922 | #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0) | ||
1923 | DEFINE_DMA_BUF_EXPORT_INFO(exp_info); | ||
1924 | |||
1925 | exp_info.priv = buf; | ||
1926 | exp_info.ops = &gk20a_vidbuf_ops; | ||
1927 | exp_info.size = buf->mem.size; | ||
1928 | exp_info.flags = O_RDWR; | ||
1929 | |||
1930 | return dma_buf_export(&exp_info); | ||
1931 | #else | ||
1932 | return dma_buf_export(buf, &gk20a_vidbuf_ops, buf->mem.size, | ||
1933 | O_RDWR, NULL); | ||
1934 | #endif | ||
1935 | } | ||
1936 | #endif | ||
1937 | |||
1938 | int gk20a_vidmem_buf_alloc(struct gk20a *g, size_t bytes) | ||
1939 | { | ||
1940 | #if defined(CONFIG_GK20A_VIDMEM) | ||
1941 | struct gk20a_vidmem_buf *buf; | ||
1942 | int err, fd; | ||
1943 | |||
1944 | gk20a_dbg_fn(""); | ||
1945 | |||
1946 | buf = kzalloc(sizeof(*buf), GFP_KERNEL); | ||
1947 | if (!buf) | ||
1948 | return -ENOMEM; | ||
1949 | |||
1950 | buf->g = g; | ||
1951 | |||
1952 | err = gk20a_gmmu_alloc_vid(g, bytes, &buf->mem); | ||
1953 | if (err) | ||
1954 | goto err_kfree; | ||
1955 | |||
1956 | buf->dmabuf = gk20a_vidbuf_export(buf); | ||
1957 | if (IS_ERR(buf->dmabuf)) { | ||
1958 | err = PTR_ERR(buf->dmabuf); | ||
1959 | goto err_bfree; | ||
1960 | } | ||
1961 | |||
1962 | fd = get_unused_fd_flags(O_RDWR); | ||
1963 | if (fd < 0) { | ||
1964 | /* ->release frees what we have done */ | ||
1965 | dma_buf_put(buf->dmabuf); | ||
1966 | return fd; | ||
1967 | } | ||
1968 | |||
1969 | /* fclose() on this drops one ref, freeing the dma buf */ | ||
1970 | fd_install(fd, buf->dmabuf->file); | ||
1971 | |||
1972 | return fd; | ||
1973 | |||
1974 | err_bfree: | ||
1975 | gk20a_gmmu_free(g, &buf->mem); | ||
1976 | err_kfree: | ||
1977 | kfree(buf); | ||
1978 | return err; | ||
1979 | #else | ||
1980 | return -ENOSYS; | ||
1981 | #endif | ||
1982 | } | ||
1983 | |||
1836 | u64 gk20a_vm_map(struct vm_gk20a *vm, | 1984 | u64 gk20a_vm_map(struct vm_gk20a *vm, |
1837 | struct dma_buf *dmabuf, | 1985 | struct dma_buf *dmabuf, |
1838 | u64 offset_align, | 1986 | u64 offset_align, |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index d7503948..5f0ce657 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -718,6 +718,8 @@ void gk20a_vm_mapping_batch_finish_locked( | |||
718 | struct vm_gk20a *vm, struct vm_gk20a_mapping_batch *batch); | 718 | struct vm_gk20a *vm, struct vm_gk20a_mapping_batch *batch); |
719 | 719 | ||
720 | 720 | ||
721 | int gk20a_vidmem_buf_alloc(struct gk20a *g, size_t bytes); | ||
722 | |||
721 | /* Note: batch may be NULL if map op is not part of a batch */ | 723 | /* Note: batch may be NULL if map op is not part of a batch */ |
722 | int gk20a_vm_map_buffer(struct vm_gk20a *vm, | 724 | int gk20a_vm_map_buffer(struct vm_gk20a *vm, |
723 | int dmabuf_fd, | 725 | int dmabuf_fd, |
diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h index aa950dfa..66ea05b3 100644 --- a/include/uapi/linux/nvgpu.h +++ b/include/uapi/linux/nvgpu.h | |||
@@ -196,6 +196,8 @@ struct nvgpu_gpu_characteristics { | |||
196 | __u32 default_graphics_preempt_mode; /* NVGPU_GRAPHICS_PREEMPTION_MODE_* */ | 196 | __u32 default_graphics_preempt_mode; /* NVGPU_GRAPHICS_PREEMPTION_MODE_* */ |
197 | __u32 default_compute_preempt_mode; /* NVGPU_COMPUTE_PREEMPTION_MODE_* */ | 197 | __u32 default_compute_preempt_mode; /* NVGPU_COMPUTE_PREEMPTION_MODE_* */ |
198 | 198 | ||
199 | __u64 local_video_memory_size; /* in bytes, non-zero only for dGPUs */ | ||
200 | |||
199 | /* Notes: | 201 | /* Notes: |
200 | - This struct can be safely appended with new fields. However, always | 202 | - This struct can be safely appended with new fields. However, always |
201 | keep the structure size multiple of 8 and make sure that the binary | 203 | keep the structure size multiple of 8 and make sure that the binary |
@@ -434,6 +436,72 @@ struct nvgpu_gpu_get_engine_info_args { | |||
434 | __u64 engine_info_buf_addr; | 436 | __u64 engine_info_buf_addr; |
435 | }; | 437 | }; |
436 | 438 | ||
439 | #define NVGPU_GPU_ALLOC_VIDMEM_FLAG_CONTIGUOUS (1U << 0) | ||
440 | |||
441 | /* CPU access and coherency flags (3 bits). Use CPU access with care, | ||
442 | * BAR resources are scarce. */ | ||
443 | #define NVGPU_GPU_ALLOC_VIDMEM_FLAG_CPU_NOT_MAPPABLE (0U << 1) | ||
444 | #define NVGPU_GPU_ALLOC_VIDMEM_FLAG_CPU_WRITE_COMBINE (1U << 1) | ||
445 | #define NVGPU_GPU_ALLOC_VIDMEM_FLAG_CPU_CACHED (2U << 1) | ||
446 | #define NVGPU_GPU_ALLOC_VIDMEM_FLAG_CPU_MASK (7U << 1) | ||
447 | |||
448 | #define NVGPU_GPU_ALLOC_VIDMEM_FLAG_VPR (1U << 4) | ||
449 | |||
450 | /* Allocation of device-specific local video memory. Returns dmabuf fd | ||
451 | * on success. */ | ||
452 | struct nvgpu_gpu_alloc_vidmem_args { | ||
453 | union { | ||
454 | struct { | ||
455 | /* Size for allocation. Must be a multiple of | ||
456 | * small page size. */ | ||
457 | __u64 size; | ||
458 | |||
459 | /* NVGPU_GPU_ALLOC_VIDMEM_FLAG_* */ | ||
460 | __u32 flags; | ||
461 | |||
462 | /* Informational mem tag for resource usage | ||
463 | * tracking. */ | ||
464 | __u16 memtag; | ||
465 | |||
466 | __u16 reserved0; | ||
467 | |||
468 | /* GPU-visible physical memory alignment in | ||
469 | * bytes. | ||
470 | * | ||
471 | * Alignment must be a power of two. Minimum | ||
472 | * alignment is the small page size, which 0 | ||
473 | * also denotes. | ||
474 | * | ||
475 | * For contiguous and non-contiguous | ||
476 | * allocations, the start address of the | ||
477 | * physical memory allocation will be aligned | ||
478 | * by this value. | ||
479 | * | ||
480 | * For non-contiguous allocations, memory is | ||
481 | * internally allocated in round_up(size / | ||
482 | * alignment) contiguous blocks. The start | ||
483 | * address of each block is aligned by the | ||
484 | * alignment value. If the size is not a | ||
485 | * multiple of alignment (which is ok), the | ||
486 | * last allocation block size is (size % | ||
487 | * alignment). | ||
488 | * | ||
489 | * By specifying the big page size here and | ||
490 | * allocation size that is a multiple of big | ||
491 | * pages, it will be guaranteed that the | ||
492 | * allocated buffer is big page size mappable. | ||
493 | */ | ||
494 | __u32 alignment; | ||
495 | |||
496 | __u32 reserved1[3]; | ||
497 | } in; | ||
498 | |||
499 | struct { | ||
500 | __s32 dmabuf_fd; | ||
501 | } out; | ||
502 | }; | ||
503 | }; | ||
504 | |||
437 | #define NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE \ | 505 | #define NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE \ |
438 | _IOR(NVGPU_GPU_IOCTL_MAGIC, 1, struct nvgpu_gpu_zcull_get_ctx_size_args) | 506 | _IOR(NVGPU_GPU_IOCTL_MAGIC, 1, struct nvgpu_gpu_zcull_get_ctx_size_args) |
439 | #define NVGPU_GPU_IOCTL_ZCULL_GET_INFO \ | 507 | #define NVGPU_GPU_IOCTL_ZCULL_GET_INFO \ |
@@ -489,8 +557,11 @@ struct nvgpu_gpu_get_engine_info_args { | |||
489 | #define NVGPU_GPU_IOCTL_GET_ENGINE_INFO \ | 557 | #define NVGPU_GPU_IOCTL_GET_ENGINE_INFO \ |
490 | _IOWR(NVGPU_GPU_IOCTL_MAGIC, 26, \ | 558 | _IOWR(NVGPU_GPU_IOCTL_MAGIC, 26, \ |
491 | struct nvgpu_gpu_get_engine_info_args) | 559 | struct nvgpu_gpu_get_engine_info_args) |
560 | #define NVGPU_GPU_IOCTL_ALLOC_VIDMEM \ | ||
561 | _IOWR(NVGPU_GPU_IOCTL_MAGIC, 27, \ | ||
562 | struct nvgpu_gpu_alloc_vidmem_args) | ||
492 | #define NVGPU_GPU_IOCTL_LAST \ | 563 | #define NVGPU_GPU_IOCTL_LAST \ |
493 | _IOC_NR(NVGPU_GPU_IOCTL_GET_ENGINE_INFO) | 564 | _IOC_NR(NVGPU_GPU_IOCTL_ALLOC_VIDMEM) |
494 | #define NVGPU_GPU_IOCTL_MAX_ARG_SIZE \ | 565 | #define NVGPU_GPU_IOCTL_MAX_ARG_SIZE \ |
495 | sizeof(struct nvgpu_gpu_get_cpu_time_correlation_info_args) | 566 | sizeof(struct nvgpu_gpu_get_cpu_time_correlation_info_args) |
496 | 567 | ||