diff options
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c | 111 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 6 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 5 |
3 files changed, 98 insertions, 24 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c index bc3f67c4..2988fdec 100644 --- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c | |||
@@ -514,6 +514,8 @@ static int dbg_unbind_channel_gk20a(struct dbg_session_gk20a *dbg_s, | |||
514 | return err; | 514 | return err; |
515 | } | 515 | } |
516 | 516 | ||
517 | static int gk20a_perfbuf_release_locked(struct gk20a *g, u64 offset); | ||
518 | |||
517 | int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp) | 519 | int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp) |
518 | { | 520 | { |
519 | struct dbg_session_gk20a *dbg_s = filp->private_data; | 521 | struct dbg_session_gk20a *dbg_s = filp->private_data; |
@@ -534,6 +536,10 @@ int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp) | |||
534 | NVGPU_DBG_GPU_POWERGATE_MODE_ENABLE); | 536 | NVGPU_DBG_GPU_POWERGATE_MODE_ENABLE); |
535 | nvgpu_dbg_timeout_enable(dbg_s, NVGPU_DBG_GPU_IOCTL_TIMEOUT_ENABLE); | 537 | nvgpu_dbg_timeout_enable(dbg_s, NVGPU_DBG_GPU_IOCTL_TIMEOUT_ENABLE); |
536 | 538 | ||
539 | /* If this session owned the perf buffer, release it */ | ||
540 | if (g->perfbuf.owner == dbg_s) | ||
541 | gk20a_perfbuf_release_locked(g, g->perfbuf.offset); | ||
542 | |||
537 | /* Per-context profiler objects were released when we called | 543 | /* Per-context profiler objects were released when we called |
538 | * dbg_unbind_all_channels. We could still have global ones. | 544 | * dbg_unbind_all_channels. We could still have global ones. |
539 | */ | 545 | */ |
@@ -1821,16 +1827,39 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, | |||
1821 | struct nvgpu_dbg_gpu_perfbuf_map_args *args) | 1827 | struct nvgpu_dbg_gpu_perfbuf_map_args *args) |
1822 | { | 1828 | { |
1823 | struct gk20a *g = dbg_s->g; | 1829 | struct gk20a *g = dbg_s->g; |
1830 | struct mm_gk20a *mm = &g->mm; | ||
1831 | struct vm_gk20a *vm = &mm->perfbuf.vm; | ||
1824 | int err; | 1832 | int err; |
1825 | u32 virt_size; | 1833 | u32 virt_size; |
1826 | u32 virt_addr_lo; | 1834 | u32 virt_addr_lo; |
1827 | u32 virt_addr_hi; | 1835 | u32 virt_addr_hi; |
1828 | u32 inst_pa_page; | 1836 | u32 inst_pa_page; |
1837 | u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size; | ||
1838 | |||
1839 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
1840 | |||
1841 | if (g->perfbuf.owner) { | ||
1842 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
1843 | return -EBUSY; | ||
1844 | } | ||
1845 | |||
1846 | err = gk20a_init_vm(mm, vm, big_page_size, | ||
1847 | big_page_size << 10, | ||
1848 | NV_MM_DEFAULT_KERNEL_SIZE, | ||
1849 | NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE, | ||
1850 | false, false, "perfbuf"); | ||
1851 | if (err) { | ||
1852 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
1853 | return err; | ||
1854 | } | ||
1855 | |||
1856 | err = gk20a_alloc_inst_block(g, &mm->perfbuf.inst_block); | ||
1857 | if (err) | ||
1858 | goto err_remove_vm; | ||
1829 | 1859 | ||
1830 | if (!g->allow_all) | 1860 | g->ops.mm.init_inst_block(&mm->perfbuf.inst_block, vm, 0); |
1831 | return -EACCES; | ||
1832 | 1861 | ||
1833 | err = gk20a_vm_map_buffer(&g->mm.pmu.vm, | 1862 | err = gk20a_vm_map_buffer(vm, |
1834 | args->dmabuf_fd, | 1863 | args->dmabuf_fd, |
1835 | &args->offset, | 1864 | &args->offset, |
1836 | 0, | 1865 | 0, |
@@ -1839,23 +1868,21 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, | |||
1839 | args->mapping_size, | 1868 | args->mapping_size, |
1840 | NULL); | 1869 | NULL); |
1841 | if (err) | 1870 | if (err) |
1842 | return err; | 1871 | goto err_remove_vm; |
1843 | 1872 | ||
1844 | /* perf output buffer may not cross a 4GB boundary - with a separate va | 1873 | /* perf output buffer may not cross a 4GB boundary */ |
1845 | * smaller than that, it won't */ | ||
1846 | virt_size = u64_lo32(args->mapping_size); | 1874 | virt_size = u64_lo32(args->mapping_size); |
1847 | virt_addr_lo = u64_lo32(args->offset); | 1875 | virt_addr_lo = u64_lo32(args->offset); |
1848 | virt_addr_hi = u64_hi32(args->offset); | 1876 | virt_addr_hi = u64_hi32(args->offset); |
1849 | /* but check anyway */ | 1877 | if (u64_hi32(args->offset) != u64_hi32(args->offset + virt_size)) { |
1850 | if (args->offset + virt_size > SZ_4G) { | ||
1851 | err = -EINVAL; | 1878 | err = -EINVAL; |
1852 | goto fail_unmap; | 1879 | goto err_unmap; |
1853 | } | 1880 | } |
1854 | 1881 | ||
1855 | err = gk20a_busy(g); | 1882 | err = gk20a_busy(g); |
1856 | if (err) { | 1883 | if (err) { |
1857 | nvgpu_err(g, "failed to poweron"); | 1884 | nvgpu_err(g, "failed to poweron"); |
1858 | goto fail_unmap; | 1885 | goto err_unmap; |
1859 | } | 1886 | } |
1860 | 1887 | ||
1861 | /* address and size are aligned to 32 bytes, the lowest bits read back | 1888 | /* address and size are aligned to 32 bytes, the lowest bits read back |
@@ -1866,7 +1893,8 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, | |||
1866 | gk20a_writel(g, perf_pmasys_outsize_r(), virt_size); | 1893 | gk20a_writel(g, perf_pmasys_outsize_r(), virt_size); |
1867 | 1894 | ||
1868 | /* this field is aligned to 4K */ | 1895 | /* this field is aligned to 4K */ |
1869 | inst_pa_page = gk20a_mm_inst_block_addr(g, &g->mm.hwpm.inst_block) >> 12; | 1896 | inst_pa_page = gk20a_mm_inst_block_addr(g, |
1897 | &mm->perfbuf.inst_block) >> 12; | ||
1870 | 1898 | ||
1871 | /* A write to MEM_BLOCK triggers the block bind operation. MEM_BLOCK | 1899 | /* A write to MEM_BLOCK triggers the block bind operation. MEM_BLOCK |
1872 | * should be written last */ | 1900 | * should be written last */ |
@@ -1877,23 +1905,24 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, | |||
1877 | 1905 | ||
1878 | gk20a_idle(g); | 1906 | gk20a_idle(g); |
1879 | 1907 | ||
1908 | g->perfbuf.owner = dbg_s; | ||
1909 | g->perfbuf.offset = args->offset; | ||
1910 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
1911 | |||
1880 | return 0; | 1912 | return 0; |
1881 | 1913 | ||
1882 | fail_unmap: | 1914 | err_unmap: |
1883 | gk20a_vm_unmap_buffer(&g->mm.pmu.vm, args->offset, NULL); | 1915 | gk20a_vm_unmap_buffer(vm, args->offset, NULL); |
1916 | err_remove_vm: | ||
1917 | gk20a_remove_vm(vm, &mm->perfbuf.inst_block); | ||
1918 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
1884 | return err; | 1919 | return err; |
1885 | } | 1920 | } |
1886 | 1921 | ||
1887 | static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s, | 1922 | /* must be called with dbg_sessions_lock held */ |
1888 | struct nvgpu_dbg_gpu_perfbuf_unmap_args *args) | 1923 | static int gk20a_perfbuf_disable_locked(struct gk20a *g) |
1889 | { | 1924 | { |
1890 | struct gk20a *g = dbg_s->g; | 1925 | int err = gk20a_busy(g); |
1891 | int err; | ||
1892 | |||
1893 | if (!g->allow_all) | ||
1894 | return -EACCES; | ||
1895 | |||
1896 | err = gk20a_busy(g); | ||
1897 | if (err) { | 1926 | if (err) { |
1898 | nvgpu_err(g, "failed to poweron"); | 1927 | nvgpu_err(g, "failed to poweron"); |
1899 | return err; | 1928 | return err; |
@@ -1911,11 +1940,45 @@ static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s, | |||
1911 | 1940 | ||
1912 | gk20a_idle(g); | 1941 | gk20a_idle(g); |
1913 | 1942 | ||
1914 | gk20a_vm_unmap_buffer(&g->mm.pmu.vm, args->offset, NULL); | ||
1915 | |||
1916 | return 0; | 1943 | return 0; |
1917 | } | 1944 | } |
1918 | 1945 | ||
1946 | static int gk20a_perfbuf_release_locked(struct gk20a *g, u64 offset) | ||
1947 | { | ||
1948 | struct mm_gk20a *mm = &g->mm; | ||
1949 | struct vm_gk20a *vm = &mm->perfbuf.vm; | ||
1950 | int err; | ||
1951 | |||
1952 | err = gk20a_perfbuf_disable_locked(g); | ||
1953 | |||
1954 | gk20a_vm_unmap_buffer(vm, offset, NULL); | ||
1955 | gk20a_remove_vm(vm, &mm->perfbuf.inst_block); | ||
1956 | |||
1957 | g->perfbuf.owner = NULL; | ||
1958 | g->perfbuf.offset = 0; | ||
1959 | return err; | ||
1960 | } | ||
1961 | |||
1962 | static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s, | ||
1963 | struct nvgpu_dbg_gpu_perfbuf_unmap_args *args) | ||
1964 | { | ||
1965 | struct gk20a *g = dbg_s->g; | ||
1966 | int err; | ||
1967 | |||
1968 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
1969 | if ((g->perfbuf.owner != dbg_s) || | ||
1970 | (g->perfbuf.offset != args->offset)) { | ||
1971 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
1972 | return -EINVAL; | ||
1973 | } | ||
1974 | |||
1975 | err = gk20a_perfbuf_release_locked(g, args->offset); | ||
1976 | |||
1977 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
1978 | |||
1979 | return err; | ||
1980 | } | ||
1981 | |||
1919 | void gk20a_init_dbg_session_ops(struct gpu_ops *gops) | 1982 | void gk20a_init_dbg_session_ops(struct gpu_ops *gops) |
1920 | { | 1983 | { |
1921 | gops->dbg_session_ops.exec_reg_ops = exec_regops_gk20a; | 1984 | gops->dbg_session_ops.exec_reg_ops = exec_regops_gk20a; |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 76251000..2f25e406 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -1027,6 +1027,12 @@ struct gk20a { | |||
1027 | struct nvgpu_dbg_gpu_reg_op *dbg_regops_tmp_buf; | 1027 | struct nvgpu_dbg_gpu_reg_op *dbg_regops_tmp_buf; |
1028 | u32 dbg_regops_tmp_buf_ops; | 1028 | u32 dbg_regops_tmp_buf_ops; |
1029 | 1029 | ||
1030 | /* For perfbuf mapping */ | ||
1031 | struct { | ||
1032 | struct dbg_session_gk20a *owner; | ||
1033 | u64 offset; | ||
1034 | } perfbuf; | ||
1035 | |||
1030 | /* For profiler reservations */ | 1036 | /* For profiler reservations */ |
1031 | struct nvgpu_list_node profiler_objects; | 1037 | struct nvgpu_list_node profiler_objects; |
1032 | bool global_profiler_reservation_held; | 1038 | bool global_profiler_reservation_held; |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index dfdaa164..7fac811e 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -363,6 +363,11 @@ struct mm_gk20a { | |||
363 | 363 | ||
364 | struct { | 364 | struct { |
365 | struct vm_gk20a vm; | 365 | struct vm_gk20a vm; |
366 | struct nvgpu_mem inst_block; | ||
367 | } perfbuf; | ||
368 | |||
369 | struct { | ||
370 | struct vm_gk20a vm; | ||
366 | } cde; | 371 | } cde; |
367 | 372 | ||
368 | struct { | 373 | struct { |