diff options
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c | 88 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/vgpu/dbg_vgpu.c | 33 | ||||
-rw-r--r-- | include/linux/tegra_vgpu.h | 8 |
4 files changed, 94 insertions, 37 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c index 84edacbb..9b0d9456 100644 --- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c | |||
@@ -1834,6 +1834,51 @@ static int nvgpu_ioctl_profiler_reserve(struct dbg_session_gk20a *dbg_s, | |||
1834 | return nvgpu_profiler_reserve_release(dbg_s, args->profiler_handle); | 1834 | return nvgpu_profiler_reserve_release(dbg_s, args->profiler_handle); |
1835 | } | 1835 | } |
1836 | 1836 | ||
1837 | static int gk20a_perfbuf_enable_locked(struct gk20a *g, u64 offset, u32 size) | ||
1838 | { | ||
1839 | struct mm_gk20a *mm = &g->mm; | ||
1840 | u32 virt_addr_lo; | ||
1841 | u32 virt_addr_hi; | ||
1842 | u32 inst_pa_page; | ||
1843 | int err; | ||
1844 | |||
1845 | err = gk20a_busy(g); | ||
1846 | if (err) { | ||
1847 | nvgpu_err(g, "failed to poweron"); | ||
1848 | return err; | ||
1849 | } | ||
1850 | |||
1851 | err = gk20a_alloc_inst_block(g, &mm->perfbuf.inst_block); | ||
1852 | if (err) | ||
1853 | return err; | ||
1854 | |||
1855 | g->ops.mm.init_inst_block(&mm->perfbuf.inst_block, mm->perfbuf.vm, 0); | ||
1856 | |||
1857 | virt_addr_lo = u64_lo32(offset); | ||
1858 | virt_addr_hi = u64_hi32(offset); | ||
1859 | |||
1860 | /* address and size are aligned to 32 bytes, the lowest bits read back | ||
1861 | * as zeros */ | ||
1862 | gk20a_writel(g, perf_pmasys_outbase_r(), virt_addr_lo); | ||
1863 | gk20a_writel(g, perf_pmasys_outbaseupper_r(), | ||
1864 | perf_pmasys_outbaseupper_ptr_f(virt_addr_hi)); | ||
1865 | gk20a_writel(g, perf_pmasys_outsize_r(), size); | ||
1866 | |||
1867 | /* this field is aligned to 4K */ | ||
1868 | inst_pa_page = gk20a_mm_inst_block_addr(g, | ||
1869 | &mm->perfbuf.inst_block) >> 12; | ||
1870 | |||
1871 | /* A write to MEM_BLOCK triggers the block bind operation. MEM_BLOCK | ||
1872 | * should be written last */ | ||
1873 | gk20a_writel(g, perf_pmasys_mem_block_r(), | ||
1874 | perf_pmasys_mem_block_base_f(inst_pa_page) | | ||
1875 | perf_pmasys_mem_block_valid_true_f() | | ||
1876 | perf_pmasys_mem_block_target_lfb_f()); | ||
1877 | |||
1878 | gk20a_idle(g); | ||
1879 | return 0; | ||
1880 | } | ||
1881 | |||
1837 | static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, | 1882 | static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, |
1838 | struct nvgpu_dbg_gpu_perfbuf_map_args *args) | 1883 | struct nvgpu_dbg_gpu_perfbuf_map_args *args) |
1839 | { | 1884 | { |
@@ -1841,9 +1886,6 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, | |||
1841 | struct mm_gk20a *mm = &g->mm; | 1886 | struct mm_gk20a *mm = &g->mm; |
1842 | int err; | 1887 | int err; |
1843 | u32 virt_size; | 1888 | u32 virt_size; |
1844 | u32 virt_addr_lo; | ||
1845 | u32 virt_addr_hi; | ||
1846 | u32 inst_pa_page; | ||
1847 | u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size; | 1889 | u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size; |
1848 | 1890 | ||
1849 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | 1891 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); |
@@ -1863,12 +1905,6 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, | |||
1863 | return -ENOMEM; | 1905 | return -ENOMEM; |
1864 | } | 1906 | } |
1865 | 1907 | ||
1866 | err = gk20a_alloc_inst_block(g, &mm->perfbuf.inst_block); | ||
1867 | if (err) | ||
1868 | goto err_remove_vm; | ||
1869 | |||
1870 | g->ops.mm.init_inst_block(&mm->perfbuf.inst_block, mm->perfbuf.vm, 0); | ||
1871 | |||
1872 | err = nvgpu_vm_map_buffer(mm->perfbuf.vm, | 1908 | err = nvgpu_vm_map_buffer(mm->perfbuf.vm, |
1873 | args->dmabuf_fd, | 1909 | args->dmabuf_fd, |
1874 | &args->offset, | 1910 | &args->offset, |
@@ -1882,38 +1918,15 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, | |||
1882 | 1918 | ||
1883 | /* perf output buffer may not cross a 4GB boundary */ | 1919 | /* perf output buffer may not cross a 4GB boundary */ |
1884 | virt_size = u64_lo32(args->mapping_size); | 1920 | virt_size = u64_lo32(args->mapping_size); |
1885 | virt_addr_lo = u64_lo32(args->offset); | ||
1886 | virt_addr_hi = u64_hi32(args->offset); | ||
1887 | if (u64_hi32(args->offset) != u64_hi32(args->offset + virt_size)) { | 1921 | if (u64_hi32(args->offset) != u64_hi32(args->offset + virt_size)) { |
1888 | err = -EINVAL; | 1922 | err = -EINVAL; |
1889 | goto err_unmap; | 1923 | goto err_unmap; |
1890 | } | 1924 | } |
1891 | 1925 | ||
1892 | err = gk20a_busy(g); | 1926 | err = g->ops.dbg_session_ops.perfbuffer_enable(g, |
1893 | if (err) { | 1927 | args->offset, virt_size); |
1894 | nvgpu_err(g, "failed to poweron"); | 1928 | if (err) |
1895 | goto err_unmap; | 1929 | goto err_unmap; |
1896 | } | ||
1897 | |||
1898 | /* address and size are aligned to 32 bytes, the lowest bits read back | ||
1899 | * as zeros */ | ||
1900 | gk20a_writel(g, perf_pmasys_outbase_r(), virt_addr_lo); | ||
1901 | gk20a_writel(g, perf_pmasys_outbaseupper_r(), | ||
1902 | perf_pmasys_outbaseupper_ptr_f(virt_addr_hi)); | ||
1903 | gk20a_writel(g, perf_pmasys_outsize_r(), virt_size); | ||
1904 | |||
1905 | /* this field is aligned to 4K */ | ||
1906 | inst_pa_page = gk20a_mm_inst_block_addr(g, | ||
1907 | &mm->perfbuf.inst_block) >> 12; | ||
1908 | |||
1909 | /* A write to MEM_BLOCK triggers the block bind operation. MEM_BLOCK | ||
1910 | * should be written last */ | ||
1911 | gk20a_writel(g, perf_pmasys_mem_block_r(), | ||
1912 | perf_pmasys_mem_block_base_f(inst_pa_page) | | ||
1913 | perf_pmasys_mem_block_valid_true_f() | | ||
1914 | perf_pmasys_mem_block_target_lfb_f()); | ||
1915 | |||
1916 | gk20a_idle(g); | ||
1917 | 1930 | ||
1918 | g->perfbuf.owner = dbg_s; | 1931 | g->perfbuf.owner = dbg_s; |
1919 | g->perfbuf.offset = args->offset; | 1932 | g->perfbuf.offset = args->offset; |
@@ -1924,7 +1937,6 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, | |||
1924 | err_unmap: | 1937 | err_unmap: |
1925 | nvgpu_vm_unmap_buffer(mm->perfbuf.vm, args->offset, NULL); | 1938 | nvgpu_vm_unmap_buffer(mm->perfbuf.vm, args->offset, NULL); |
1926 | err_remove_vm: | 1939 | err_remove_vm: |
1927 | gk20a_free_inst_block(g, &mm->perfbuf.inst_block); | ||
1928 | nvgpu_vm_put(mm->perfbuf.vm); | 1940 | nvgpu_vm_put(mm->perfbuf.vm); |
1929 | nvgpu_mutex_release(&g->dbg_sessions_lock); | 1941 | nvgpu_mutex_release(&g->dbg_sessions_lock); |
1930 | return err; | 1942 | return err; |
@@ -1960,7 +1972,7 @@ static int gk20a_perfbuf_release_locked(struct gk20a *g, u64 offset) | |||
1960 | struct vm_gk20a *vm = mm->perfbuf.vm; | 1972 | struct vm_gk20a *vm = mm->perfbuf.vm; |
1961 | int err; | 1973 | int err; |
1962 | 1974 | ||
1963 | err = gk20a_perfbuf_disable_locked(g); | 1975 | err = g->ops.dbg_session_ops.perfbuffer_disable(g); |
1964 | 1976 | ||
1965 | nvgpu_vm_unmap_buffer(vm, offset, NULL); | 1977 | nvgpu_vm_unmap_buffer(vm, offset, NULL); |
1966 | gk20a_free_inst_block(g, &mm->perfbuf.inst_block); | 1978 | gk20a_free_inst_block(g, &mm->perfbuf.inst_block); |
@@ -2001,4 +2013,6 @@ void gk20a_init_dbg_session_ops(struct gpu_ops *gops) | |||
2001 | nvgpu_check_and_set_context_reservation; | 2013 | nvgpu_check_and_set_context_reservation; |
2002 | gops->dbg_session_ops.release_profiler_reservation = | 2014 | gops->dbg_session_ops.release_profiler_reservation = |
2003 | nvgpu_release_profiler_reservation; | 2015 | nvgpu_release_profiler_reservation; |
2016 | gops->dbg_session_ops.perfbuffer_enable = gk20a_perfbuf_enable_locked; | ||
2017 | gops->dbg_session_ops.perfbuffer_disable = gk20a_perfbuf_disable_locked; | ||
2004 | }; | 2018 | }; |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index acfb6144..8f291f92 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -869,6 +869,8 @@ struct gpu_ops { | |||
869 | void (*release_profiler_reservation)( | 869 | void (*release_profiler_reservation)( |
870 | struct dbg_session_gk20a *dbg_s, | 870 | struct dbg_session_gk20a *dbg_s, |
871 | struct dbg_profiler_object_data *prof_obj); | 871 | struct dbg_profiler_object_data *prof_obj); |
872 | int (*perfbuffer_enable)(struct gk20a *g, u64 offset, u32 size); | ||
873 | int (*perfbuffer_disable)(struct gk20a *g); | ||
872 | } dbg_session_ops; | 874 | } dbg_session_ops; |
873 | struct { | 875 | struct { |
874 | void (*get_program_numbers)(struct gk20a *g, | 876 | void (*get_program_numbers)(struct gk20a *g, |
diff --git a/drivers/gpu/nvgpu/vgpu/dbg_vgpu.c b/drivers/gpu/nvgpu/vgpu/dbg_vgpu.c index b9cbcead..72faf76c 100644 --- a/drivers/gpu/nvgpu/vgpu/dbg_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/dbg_vgpu.c | |||
@@ -178,6 +178,37 @@ static void vgpu_release_profiler_reservation( | |||
178 | vgpu_sendrecv_prof_cmd(dbg_s, TEGRA_VGPU_PROF_RELEASE); | 178 | vgpu_sendrecv_prof_cmd(dbg_s, TEGRA_VGPU_PROF_RELEASE); |
179 | } | 179 | } |
180 | 180 | ||
181 | static int vgpu_sendrecv_perfbuf_cmd(struct gk20a *g, u64 offset, u32 size) | ||
182 | { | ||
183 | struct mm_gk20a *mm = &g->mm; | ||
184 | struct vm_gk20a *vm = mm->perfbuf.vm; | ||
185 | struct tegra_vgpu_cmd_msg msg; | ||
186 | struct tegra_vgpu_perfbuf_mgt_params *p = | ||
187 | &msg.params.perfbuf_management; | ||
188 | int err; | ||
189 | |||
190 | msg.cmd = TEGRA_VGPU_CMD_PERFBUF_MGT; | ||
191 | msg.handle = vgpu_get_handle(g); | ||
192 | |||
193 | p->vm_handle = vm->handle; | ||
194 | p->offset = offset; | ||
195 | p->size = size; | ||
196 | |||
197 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
198 | err = err ? err : msg.ret; | ||
199 | return err; | ||
200 | } | ||
201 | |||
202 | static int vgpu_perfbuffer_enable(struct gk20a *g, u64 offset, u32 size) | ||
203 | { | ||
204 | return vgpu_sendrecv_perfbuf_cmd(g, offset, size); | ||
205 | } | ||
206 | |||
207 | static int vgpu_perfbuffer_disable(struct gk20a *g) | ||
208 | { | ||
209 | return vgpu_sendrecv_perfbuf_cmd(g, 0, 0); | ||
210 | } | ||
211 | |||
181 | void vgpu_init_dbg_session_ops(struct gpu_ops *gops) | 212 | void vgpu_init_dbg_session_ops(struct gpu_ops *gops) |
182 | { | 213 | { |
183 | gops->dbg_session_ops.exec_reg_ops = vgpu_exec_regops; | 214 | gops->dbg_session_ops.exec_reg_ops = vgpu_exec_regops; |
@@ -188,4 +219,6 @@ void vgpu_init_dbg_session_ops(struct gpu_ops *gops) | |||
188 | vgpu_check_and_set_context_reservation; | 219 | vgpu_check_and_set_context_reservation; |
189 | gops->dbg_session_ops.release_profiler_reservation = | 220 | gops->dbg_session_ops.release_profiler_reservation = |
190 | vgpu_release_profiler_reservation; | 221 | vgpu_release_profiler_reservation; |
222 | gops->dbg_session_ops.perfbuffer_enable = vgpu_perfbuffer_enable; | ||
223 | gops->dbg_session_ops.perfbuffer_disable = vgpu_perfbuffer_disable; | ||
191 | } | 224 | } |
diff --git a/include/linux/tegra_vgpu.h b/include/linux/tegra_vgpu.h index f0a809ba..eb510438 100644 --- a/include/linux/tegra_vgpu.h +++ b/include/linux/tegra_vgpu.h | |||
@@ -104,6 +104,7 @@ enum { | |||
104 | TEGRA_VGPU_CMD_GET_GPU_FREQ_TABLE = 70, | 104 | TEGRA_VGPU_CMD_GET_GPU_FREQ_TABLE = 70, |
105 | TEGRA_VGPU_CMD_CAP_GPU_CLK_RATE = 71, | 105 | TEGRA_VGPU_CMD_CAP_GPU_CLK_RATE = 71, |
106 | TEGRA_VGPU_CMD_PROF_MGT = 72, | 106 | TEGRA_VGPU_CMD_PROF_MGT = 72, |
107 | TEGRA_VGPU_CMD_PERFBUF_MGT = 73, | ||
107 | TEGRA_VGPU_CMD_GET_TIMESTAMPS_ZIPPER = 74, | 108 | TEGRA_VGPU_CMD_GET_TIMESTAMPS_ZIPPER = 74, |
108 | }; | 109 | }; |
109 | 110 | ||
@@ -488,6 +489,12 @@ struct tegra_vgpu_prof_mgt_params { | |||
488 | u32 mode; | 489 | u32 mode; |
489 | }; | 490 | }; |
490 | 491 | ||
492 | struct tegra_vgpu_perfbuf_mgt_params { | ||
493 | u64 vm_handle; | ||
494 | u64 offset; | ||
495 | u32 size; | ||
496 | }; | ||
497 | |||
491 | #define TEGRA_VGPU_GPU_FREQ_TABLE_SIZE 25 | 498 | #define TEGRA_VGPU_GPU_FREQ_TABLE_SIZE 25 |
492 | 499 | ||
493 | struct tegra_vgpu_get_gpu_freq_table_params { | 500 | struct tegra_vgpu_get_gpu_freq_table_params { |
@@ -545,6 +552,7 @@ struct tegra_vgpu_cmd_msg { | |||
545 | struct tegra_vgpu_suspend_resume_contexts resume_contexts; | 552 | struct tegra_vgpu_suspend_resume_contexts resume_contexts; |
546 | struct tegra_vgpu_clear_sm_error_state clear_sm_error_state; | 553 | struct tegra_vgpu_clear_sm_error_state clear_sm_error_state; |
547 | struct tegra_vgpu_prof_mgt_params prof_management; | 554 | struct tegra_vgpu_prof_mgt_params prof_management; |
555 | struct tegra_vgpu_perfbuf_mgt_params perfbuf_management; | ||
548 | struct tegra_vgpu_get_timestamps_zipper_params get_timestamps_zipper; | 556 | struct tegra_vgpu_get_timestamps_zipper_params get_timestamps_zipper; |
549 | struct tegra_vgpu_get_gpu_freq_table_params get_gpu_freq_table; | 557 | struct tegra_vgpu_get_gpu_freq_table_params get_gpu_freq_table; |
550 | char padding[192]; | 558 | char padding[192]; |