summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a
diff options
context:
space:
mode:
authorSami Kiminki <skiminki@nvidia.com>2014-10-24 13:40:57 -0400
committerDan Willemsen <dwillemsen@nvidia.com>2015-03-18 15:12:07 -0400
commitd11fbfe7b1b68b3aab93f7703896d95d40b79a58 (patch)
treea4b8574c9181284523efa5105878c2e3ef2e05fa /drivers/gpu/nvgpu/gk20a
parent2c5fdd1c8a76ef9ca21abcf894f2c9525d57fd49 (diff)
gpu: nvgpu: GPU characteristics additions
Add the following info into GPU characteristics: available big page sizes, support indicators for sync fence fds and cycle stats, gpc mask, SM version, SM SPA version and warp count, and IOCTL interface levels. Also, add new IOCTL to fetch TPC masks. Bug 1551769 Bug 1558186 Change-Id: I8a47d882645f29c7bf0c8f74334ebf47240e41de Signed-off-by: Sami Kiminki <skiminki@nvidia.com> Reviewed-on: http://git-master/r/562904 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r--drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c28
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.c22
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h4
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c22
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h16
5 files changed, 90 insertions, 2 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
index 93831844..3bcbdfd9 100644
--- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
@@ -221,6 +221,30 @@ clean_up:
221 return err; 221 return err;
222} 222}
223 223
224static int gk20a_ctrl_get_tpc_masks(struct gk20a *g,
225 struct nvgpu_gpu_get_tpc_masks_args *args)
226{
227 struct gr_gk20a *gr = &g->gr;
228 int err = 0;
229 const u32 gpc_tpc_mask_size = sizeof(u32) * gr->gpc_count;
230
231 if (args->mask_buf_size > 0) {
232 size_t write_size = gpc_tpc_mask_size;
233
234 if (write_size > args->mask_buf_size)
235 write_size = args->mask_buf_size;
236
237 err = copy_to_user((void __user *)(uintptr_t)
238 args->mask_buf_addr,
239 gr->gpc_tpc_mask, write_size);
240 }
241
242 if (err == 0)
243 args->mask_buf_size = gpc_tpc_mask_size;
244
245 return err;
246}
247
224long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 248long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
225{ 249{
226 struct platform_device *dev = filp->private_data; 250 struct platform_device *dev = filp->private_data;
@@ -390,6 +414,10 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
390 err = gk20a_ctrl_open_tsg(g, 414 err = gk20a_ctrl_open_tsg(g,
391 (struct nvgpu_gpu_open_tsg_args *)buf); 415 (struct nvgpu_gpu_open_tsg_args *)buf);
392 break; 416 break;
417 case NVGPU_GPU_IOCTL_GET_TPC_MASKS:
418 err = gk20a_ctrl_get_tpc_masks(g,
419 (struct nvgpu_gpu_get_tpc_masks_args *)buf);
420 break;
393 default: 421 default:
394 dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd); 422 dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd);
395 err = -ENOTTY; 423 err = -ENOTTY;
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index ef0f6a8c..1bd1c898 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -1774,13 +1774,33 @@ int gk20a_init_gpu_characteristics(struct gk20a *g)
1774 gpu->compression_page_size = g->mm.pmu.vm.compression_page_size; 1774 gpu->compression_page_size = g->mm.pmu.vm.compression_page_size;
1775 gpu->pde_coverage_bit_count = g->mm.pmu.vm.pde_stride_shift; 1775 gpu->pde_coverage_bit_count = g->mm.pmu.vm.pde_stride_shift;
1776 1776
1777 gpu->available_big_page_sizes = gpu->big_page_size;
1778 if (g->ops.mm.get_big_page_sizes)
1779 gpu->available_big_page_sizes |= g->ops.mm.get_big_page_sizes();
1780
1777 gpu->flags = NVGPU_GPU_FLAGS_SUPPORT_PARTIAL_MAPPINGS 1781 gpu->flags = NVGPU_GPU_FLAGS_SUPPORT_PARTIAL_MAPPINGS
1778 | NVGPU_GPU_FLAGS_SUPPORT_SPARSE_ALLOCS; 1782 | NVGPU_GPU_FLAGS_SUPPORT_SPARSE_ALLOCS
1783 | NVGPU_GPU_FLAGS_SUPPORT_SYNC_FENCE_FDS;
1779 1784
1780 if (IS_ENABLED(CONFIG_TEGRA_GK20A) && 1785 if (IS_ENABLED(CONFIG_TEGRA_GK20A) &&
1781 gk20a_platform_has_syncpoints(g->dev)) 1786 gk20a_platform_has_syncpoints(g->dev))
1782 gpu->flags |= NVGPU_GPU_FLAGS_HAS_SYNCPOINTS; 1787 gpu->flags |= NVGPU_GPU_FLAGS_HAS_SYNCPOINTS;
1783 1788
1789 if (IS_ENABLED(CONFIG_GK20A_CYCLE_STATS))
1790 gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS;
1791
1792 gpu->gpc_mask = 1;
1793
1794 g->ops.gr.detect_sm_arch(g);
1795
1796 gpu->gpu_ioctl_nr_last = NVGPU_GPU_IOCTL_LAST;
1797 gpu->tsg_ioctl_nr_last = NVGPU_TSG_IOCTL_LAST;
1798 gpu->dbg_gpu_ioctl_nr_last = NVGPU_DBG_GPU_IOCTL_LAST;
1799 gpu->ioctl_channel_nr_last = NVGPU_IOCTL_CHANNEL_LAST;
1800 gpu->as_ioctl_nr_last = NVGPU_AS_IOCTL_LAST;
1801
1802 gpu->gpu_va_bit_count = 40;
1803
1784 gpu->reserved = 0; 1804 gpu->reserved = 0;
1785 1805
1786 return 0; 1806 return 0;
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index a56614ab..3f070a58 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -137,6 +137,7 @@ struct gpu_ops {
137 struct gr_zcull_info *zcull_params); 137 struct gr_zcull_info *zcull_params);
138 bool (*is_tpc_addr)(u32 addr); 138 bool (*is_tpc_addr)(u32 addr);
139 u32 (*get_tpc_num)(u32 addr); 139 u32 (*get_tpc_num)(u32 addr);
140 void (*detect_sm_arch)(struct gk20a *g);
140 } gr; 141 } gr;
141 const char *name; 142 const char *name;
142 struct { 143 struct {
@@ -304,7 +305,8 @@ struct gpu_ops {
304 void (*l2_flush)(struct gk20a *g, bool invalidate); 305 void (*l2_flush)(struct gk20a *g, bool invalidate);
305 void (*tlb_invalidate)(struct vm_gk20a *vm); 306 void (*tlb_invalidate)(struct vm_gk20a *vm);
306 void (*set_big_page_size)(struct gk20a *g, 307 void (*set_big_page_size)(struct gk20a *g,
307 void *inst_ptr, int size); 308 void *inst_ptr, int size);
309 u32 (*get_big_page_sizes)(void);
308 } mm; 310 } mm;
309 struct { 311 struct {
310 int (*prepare_ucode)(struct gk20a *g); 312 int (*prepare_ucode)(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 2c62c790..da257cd4 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -3490,6 +3490,27 @@ int gr_gk20a_get_zcull_info(struct gk20a *g, struct gr_gk20a *gr,
3490 return 0; 3490 return 0;
3491} 3491}
3492 3492
3493static void gr_gk20a_detect_sm_arch(struct gk20a *g)
3494{
3495 u32 v = gk20a_readl(g, gr_gpc0_tpc0_sm_arch_r());
3496
3497 u32 raw_version = gr_gpc0_tpc0_sm_arch_spa_version_v(v);
3498 u32 version = 0;
3499
3500 if (raw_version == gr_gpc0_tpc0_sm_arch_spa_version_smkepler_lp_v())
3501 version = 0x320; /* SM 3.2 */
3502 else
3503 gk20a_err(dev_from_gk20a(g), "Unknown SM version 0x%x\n",
3504 raw_version);
3505
3506 /* on Kepler, SM version == SPA version */
3507 g->gpu_characteristics.sm_arch_spa_version = version;
3508 g->gpu_characteristics.sm_arch_sm_version = version;
3509
3510 g->gpu_characteristics.sm_arch_warp_count =
3511 gr_gpc0_tpc0_sm_arch_warp_count_v(v);
3512}
3513
3493static int gr_gk20a_add_zbc_color(struct gk20a *g, struct gr_gk20a *gr, 3514static int gr_gk20a_add_zbc_color(struct gk20a *g, struct gr_gk20a *gr,
3494 struct zbc_entry *color_val, u32 index) 3515 struct zbc_entry *color_val, u32 index)
3495{ 3516{
@@ -7328,5 +7349,6 @@ void gk20a_init_gr_ops(struct gpu_ops *gops)
7328 gops->gr.get_zcull_info = gr_gk20a_get_zcull_info; 7349 gops->gr.get_zcull_info = gr_gk20a_get_zcull_info;
7329 gops->gr.is_tpc_addr = gr_gk20a_is_tpc_addr; 7350 gops->gr.is_tpc_addr = gr_gk20a_is_tpc_addr;
7330 gops->gr.get_tpc_num = gr_gk20a_get_tpc_num; 7351 gops->gr.get_tpc_num = gr_gk20a_get_tpc_num;
7352 gops->gr.detect_sm_arch = gr_gk20a_detect_sm_arch;
7331} 7353}
7332 7354
diff --git a/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h
index 3b16df58..f89bb2a4 100644
--- a/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h
@@ -1886,6 +1886,22 @@ static inline u32 gr_gpc0_tpc0_sm_cfg_sm_id_f(u32 v)
1886{ 1886{
1887 return (v & 0xffff) << 0; 1887 return (v & 0xffff) << 0;
1888} 1888}
1889static inline u32 gr_gpc0_tpc0_sm_arch_r(void)
1890{
1891 return 0x0050469c;
1892}
1893static inline u32 gr_gpc0_tpc0_sm_arch_warp_count_v(u32 r)
1894{
1895 return (r >> 0) & 0xff;
1896}
1897static inline u32 gr_gpc0_tpc0_sm_arch_spa_version_v(u32 r)
1898{
1899 return (r >> 8) & 0xf;
1900}
1901static inline u32 gr_gpc0_tpc0_sm_arch_spa_version_smkepler_lp_v(void)
1902{
1903 return 0x0000000c;
1904}
1889static inline u32 gr_gpc0_ppc0_pes_vsc_strem_r(void) 1905static inline u32 gr_gpc0_ppc0_pes_vsc_strem_r(void)
1890{ 1906{
1891 return 0x00503018; 1907 return 0x00503018;