summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
diff options
context:
space:
mode:
authorPeter Daifuku <pdaifuku@nvidia.com>2017-04-06 19:39:30 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-04-19 18:44:12 -0400
commit0d8f5f3fdb87b818b6239efaf91822c01cb3d859 (patch)
tree29a00ecfe46c35b853da0a9c5846f33f53461f3e /drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
parent6df49a63ca5f9d77a6d7d6c7dbaaa0fba1b707ca (diff)
gpu: nvgpu: ModeE perfbuffer feature development
perfbuf mapping fixes: - Allocate VM specifically for perfbuf use: using the PMU's results in mmu faults for larger buffers where 64k pages are used. - Make 4GB boundary check work for large address spaces - remove requirement to have allow_all flag set - track perfbuf ownership and clean up appropriately Bug 1880196 JIRA EVLR-1074 Change-Id: Ieee4eb17b64acf9b6ede37bf8e6a91892cda4a7e Signed-off-by: Peter Daifuku <pdaifuku@nvidia.com> Reviewed-on: http://git-master/r/1460809 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c111
1 files changed, 87 insertions, 24 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
index bc3f67c4..2988fdec 100644
--- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
@@ -514,6 +514,8 @@ static int dbg_unbind_channel_gk20a(struct dbg_session_gk20a *dbg_s,
514 return err; 514 return err;
515} 515}
516 516
517static int gk20a_perfbuf_release_locked(struct gk20a *g, u64 offset);
518
517int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp) 519int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp)
518{ 520{
519 struct dbg_session_gk20a *dbg_s = filp->private_data; 521 struct dbg_session_gk20a *dbg_s = filp->private_data;
@@ -534,6 +536,10 @@ int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp)
534 NVGPU_DBG_GPU_POWERGATE_MODE_ENABLE); 536 NVGPU_DBG_GPU_POWERGATE_MODE_ENABLE);
535 nvgpu_dbg_timeout_enable(dbg_s, NVGPU_DBG_GPU_IOCTL_TIMEOUT_ENABLE); 537 nvgpu_dbg_timeout_enable(dbg_s, NVGPU_DBG_GPU_IOCTL_TIMEOUT_ENABLE);
536 538
539 /* If this session owned the perf buffer, release it */
540 if (g->perfbuf.owner == dbg_s)
541 gk20a_perfbuf_release_locked(g, g->perfbuf.offset);
542
537 /* Per-context profiler objects were released when we called 543 /* Per-context profiler objects were released when we called
538 * dbg_unbind_all_channels. We could still have global ones. 544 * dbg_unbind_all_channels. We could still have global ones.
539 */ 545 */
@@ -1821,16 +1827,39 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
1821 struct nvgpu_dbg_gpu_perfbuf_map_args *args) 1827 struct nvgpu_dbg_gpu_perfbuf_map_args *args)
1822{ 1828{
1823 struct gk20a *g = dbg_s->g; 1829 struct gk20a *g = dbg_s->g;
1830 struct mm_gk20a *mm = &g->mm;
1831 struct vm_gk20a *vm = &mm->perfbuf.vm;
1824 int err; 1832 int err;
1825 u32 virt_size; 1833 u32 virt_size;
1826 u32 virt_addr_lo; 1834 u32 virt_addr_lo;
1827 u32 virt_addr_hi; 1835 u32 virt_addr_hi;
1828 u32 inst_pa_page; 1836 u32 inst_pa_page;
1837 u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size;
1838
1839 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
1840
1841 if (g->perfbuf.owner) {
1842 nvgpu_mutex_release(&g->dbg_sessions_lock);
1843 return -EBUSY;
1844 }
1845
1846 err = gk20a_init_vm(mm, vm, big_page_size,
1847 big_page_size << 10,
1848 NV_MM_DEFAULT_KERNEL_SIZE,
1849 NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
1850 false, false, "perfbuf");
1851 if (err) {
1852 nvgpu_mutex_release(&g->dbg_sessions_lock);
1853 return err;
1854 }
1855
1856 err = gk20a_alloc_inst_block(g, &mm->perfbuf.inst_block);
1857 if (err)
1858 goto err_remove_vm;
1829 1859
1830 if (!g->allow_all) 1860 g->ops.mm.init_inst_block(&mm->perfbuf.inst_block, vm, 0);
1831 return -EACCES;
1832 1861
1833 err = gk20a_vm_map_buffer(&g->mm.pmu.vm, 1862 err = gk20a_vm_map_buffer(vm,
1834 args->dmabuf_fd, 1863 args->dmabuf_fd,
1835 &args->offset, 1864 &args->offset,
1836 0, 1865 0,
@@ -1839,23 +1868,21 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
1839 args->mapping_size, 1868 args->mapping_size,
1840 NULL); 1869 NULL);
1841 if (err) 1870 if (err)
1842 return err; 1871 goto err_remove_vm;
1843 1872
1844 /* perf output buffer may not cross a 4GB boundary - with a separate va 1873 /* perf output buffer may not cross a 4GB boundary */
1845 * smaller than that, it won't */
1846 virt_size = u64_lo32(args->mapping_size); 1874 virt_size = u64_lo32(args->mapping_size);
1847 virt_addr_lo = u64_lo32(args->offset); 1875 virt_addr_lo = u64_lo32(args->offset);
1848 virt_addr_hi = u64_hi32(args->offset); 1876 virt_addr_hi = u64_hi32(args->offset);
1849 /* but check anyway */ 1877 if (u64_hi32(args->offset) != u64_hi32(args->offset + virt_size)) {
1850 if (args->offset + virt_size > SZ_4G) {
1851 err = -EINVAL; 1878 err = -EINVAL;
1852 goto fail_unmap; 1879 goto err_unmap;
1853 } 1880 }
1854 1881
1855 err = gk20a_busy(g); 1882 err = gk20a_busy(g);
1856 if (err) { 1883 if (err) {
1857 nvgpu_err(g, "failed to poweron"); 1884 nvgpu_err(g, "failed to poweron");
1858 goto fail_unmap; 1885 goto err_unmap;
1859 } 1886 }
1860 1887
1861 /* address and size are aligned to 32 bytes, the lowest bits read back 1888 /* address and size are aligned to 32 bytes, the lowest bits read back
@@ -1866,7 +1893,8 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
1866 gk20a_writel(g, perf_pmasys_outsize_r(), virt_size); 1893 gk20a_writel(g, perf_pmasys_outsize_r(), virt_size);
1867 1894
1868 /* this field is aligned to 4K */ 1895 /* this field is aligned to 4K */
1869 inst_pa_page = gk20a_mm_inst_block_addr(g, &g->mm.hwpm.inst_block) >> 12; 1896 inst_pa_page = gk20a_mm_inst_block_addr(g,
1897 &mm->perfbuf.inst_block) >> 12;
1870 1898
1871 /* A write to MEM_BLOCK triggers the block bind operation. MEM_BLOCK 1899 /* A write to MEM_BLOCK triggers the block bind operation. MEM_BLOCK
1872 * should be written last */ 1900 * should be written last */
@@ -1877,23 +1905,24 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
1877 1905
1878 gk20a_idle(g); 1906 gk20a_idle(g);
1879 1907
1908 g->perfbuf.owner = dbg_s;
1909 g->perfbuf.offset = args->offset;
1910 nvgpu_mutex_release(&g->dbg_sessions_lock);
1911
1880 return 0; 1912 return 0;
1881 1913
1882fail_unmap: 1914err_unmap:
1883 gk20a_vm_unmap_buffer(&g->mm.pmu.vm, args->offset, NULL); 1915 gk20a_vm_unmap_buffer(vm, args->offset, NULL);
1916err_remove_vm:
1917 gk20a_remove_vm(vm, &mm->perfbuf.inst_block);
1918 nvgpu_mutex_release(&g->dbg_sessions_lock);
1884 return err; 1919 return err;
1885} 1920}
1886 1921
1887static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s, 1922/* must be called with dbg_sessions_lock held */
1888 struct nvgpu_dbg_gpu_perfbuf_unmap_args *args) 1923static int gk20a_perfbuf_disable_locked(struct gk20a *g)
1889{ 1924{
1890 struct gk20a *g = dbg_s->g; 1925 int err = gk20a_busy(g);
1891 int err;
1892
1893 if (!g->allow_all)
1894 return -EACCES;
1895
1896 err = gk20a_busy(g);
1897 if (err) { 1926 if (err) {
1898 nvgpu_err(g, "failed to poweron"); 1927 nvgpu_err(g, "failed to poweron");
1899 return err; 1928 return err;
@@ -1911,11 +1940,45 @@ static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s,
1911 1940
1912 gk20a_idle(g); 1941 gk20a_idle(g);
1913 1942
1914 gk20a_vm_unmap_buffer(&g->mm.pmu.vm, args->offset, NULL);
1915
1916 return 0; 1943 return 0;
1917} 1944}
1918 1945
1946static int gk20a_perfbuf_release_locked(struct gk20a *g, u64 offset)
1947{
1948 struct mm_gk20a *mm = &g->mm;
1949 struct vm_gk20a *vm = &mm->perfbuf.vm;
1950 int err;
1951
1952 err = gk20a_perfbuf_disable_locked(g);
1953
1954 gk20a_vm_unmap_buffer(vm, offset, NULL);
1955 gk20a_remove_vm(vm, &mm->perfbuf.inst_block);
1956
1957 g->perfbuf.owner = NULL;
1958 g->perfbuf.offset = 0;
1959 return err;
1960}
1961
1962static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s,
1963 struct nvgpu_dbg_gpu_perfbuf_unmap_args *args)
1964{
1965 struct gk20a *g = dbg_s->g;
1966 int err;
1967
1968 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
1969 if ((g->perfbuf.owner != dbg_s) ||
1970 (g->perfbuf.offset != args->offset)) {
1971 nvgpu_mutex_release(&g->dbg_sessions_lock);
1972 return -EINVAL;
1973 }
1974
1975 err = gk20a_perfbuf_release_locked(g, args->offset);
1976
1977 nvgpu_mutex_release(&g->dbg_sessions_lock);
1978
1979 return err;
1980}
1981
1919void gk20a_init_dbg_session_ops(struct gpu_ops *gops) 1982void gk20a_init_dbg_session_ops(struct gpu_ops *gops)
1920{ 1983{
1921 gops->dbg_session_ops.exec_reg_ops = exec_regops_gk20a; 1984 gops->dbg_session_ops.exec_reg_ops = exec_regops_gk20a;