diff options
author | Sami Kiminki <skiminki@nvidia.com> | 2014-10-24 13:40:57 -0400 |
---|---|---|
committer | Dan Willemsen <dwillemsen@nvidia.com> | 2015-03-18 15:12:07 -0400 |
commit | d11fbfe7b1b68b3aab93f7703896d95d40b79a58 (patch) | |
tree | a4b8574c9181284523efa5105878c2e3ef2e05fa /drivers/gpu/nvgpu/gk20a | |
parent | 2c5fdd1c8a76ef9ca21abcf894f2c9525d57fd49 (diff) |
gpu: nvgpu: GPU characteristics additions
Add the following info into GPU characteristics: available big page
sizes, support indicators for sync fence fds and cycle stats, gpc
mask, SM version, SM SPA version and warp count, and IOCTL interface
levels. Also, add new IOCTL to fetch TPC masks.
Bug 1551769
Bug 1558186
Change-Id: I8a47d882645f29c7bf0c8f74334ebf47240e41de
Signed-off-by: Sami Kiminki <skiminki@nvidia.com>
Reviewed-on: http://git-master/r/562904
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c | 28 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.c | 22 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 22 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h | 16 |
5 files changed, 90 insertions, 2 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c index 93831844..3bcbdfd9 100644 --- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c | |||
@@ -221,6 +221,30 @@ clean_up: | |||
221 | return err; | 221 | return err; |
222 | } | 222 | } |
223 | 223 | ||
224 | static int gk20a_ctrl_get_tpc_masks(struct gk20a *g, | ||
225 | struct nvgpu_gpu_get_tpc_masks_args *args) | ||
226 | { | ||
227 | struct gr_gk20a *gr = &g->gr; | ||
228 | int err = 0; | ||
229 | const u32 gpc_tpc_mask_size = sizeof(u32) * gr->gpc_count; | ||
230 | |||
231 | if (args->mask_buf_size > 0) { | ||
232 | size_t write_size = gpc_tpc_mask_size; | ||
233 | |||
234 | if (write_size > args->mask_buf_size) | ||
235 | write_size = args->mask_buf_size; | ||
236 | |||
237 | err = copy_to_user((void __user *)(uintptr_t) | ||
238 | args->mask_buf_addr, | ||
239 | gr->gpc_tpc_mask, write_size); | ||
240 | } | ||
241 | |||
242 | if (err == 0) | ||
243 | args->mask_buf_size = gpc_tpc_mask_size; | ||
244 | |||
245 | return err; | ||
246 | } | ||
247 | |||
224 | long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | 248 | long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) |
225 | { | 249 | { |
226 | struct platform_device *dev = filp->private_data; | 250 | struct platform_device *dev = filp->private_data; |
@@ -390,6 +414,10 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg | |||
390 | err = gk20a_ctrl_open_tsg(g, | 414 | err = gk20a_ctrl_open_tsg(g, |
391 | (struct nvgpu_gpu_open_tsg_args *)buf); | 415 | (struct nvgpu_gpu_open_tsg_args *)buf); |
392 | break; | 416 | break; |
417 | case NVGPU_GPU_IOCTL_GET_TPC_MASKS: | ||
418 | err = gk20a_ctrl_get_tpc_masks(g, | ||
419 | (struct nvgpu_gpu_get_tpc_masks_args *)buf); | ||
420 | break; | ||
393 | default: | 421 | default: |
394 | dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd); | 422 | dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd); |
395 | err = -ENOTTY; | 423 | err = -ENOTTY; |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index ef0f6a8c..1bd1c898 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c | |||
@@ -1774,13 +1774,33 @@ int gk20a_init_gpu_characteristics(struct gk20a *g) | |||
1774 | gpu->compression_page_size = g->mm.pmu.vm.compression_page_size; | 1774 | gpu->compression_page_size = g->mm.pmu.vm.compression_page_size; |
1775 | gpu->pde_coverage_bit_count = g->mm.pmu.vm.pde_stride_shift; | 1775 | gpu->pde_coverage_bit_count = g->mm.pmu.vm.pde_stride_shift; |
1776 | 1776 | ||
1777 | gpu->available_big_page_sizes = gpu->big_page_size; | ||
1778 | if (g->ops.mm.get_big_page_sizes) | ||
1779 | gpu->available_big_page_sizes |= g->ops.mm.get_big_page_sizes(); | ||
1780 | |||
1777 | gpu->flags = NVGPU_GPU_FLAGS_SUPPORT_PARTIAL_MAPPINGS | 1781 | gpu->flags = NVGPU_GPU_FLAGS_SUPPORT_PARTIAL_MAPPINGS |
1778 | | NVGPU_GPU_FLAGS_SUPPORT_SPARSE_ALLOCS; | 1782 | | NVGPU_GPU_FLAGS_SUPPORT_SPARSE_ALLOCS |
1783 | | NVGPU_GPU_FLAGS_SUPPORT_SYNC_FENCE_FDS; | ||
1779 | 1784 | ||
1780 | if (IS_ENABLED(CONFIG_TEGRA_GK20A) && | 1785 | if (IS_ENABLED(CONFIG_TEGRA_GK20A) && |
1781 | gk20a_platform_has_syncpoints(g->dev)) | 1786 | gk20a_platform_has_syncpoints(g->dev)) |
1782 | gpu->flags |= NVGPU_GPU_FLAGS_HAS_SYNCPOINTS; | 1787 | gpu->flags |= NVGPU_GPU_FLAGS_HAS_SYNCPOINTS; |
1783 | 1788 | ||
1789 | if (IS_ENABLED(CONFIG_GK20A_CYCLE_STATS)) | ||
1790 | gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS; | ||
1791 | |||
1792 | gpu->gpc_mask = 1; | ||
1793 | |||
1794 | g->ops.gr.detect_sm_arch(g); | ||
1795 | |||
1796 | gpu->gpu_ioctl_nr_last = NVGPU_GPU_IOCTL_LAST; | ||
1797 | gpu->tsg_ioctl_nr_last = NVGPU_TSG_IOCTL_LAST; | ||
1798 | gpu->dbg_gpu_ioctl_nr_last = NVGPU_DBG_GPU_IOCTL_LAST; | ||
1799 | gpu->ioctl_channel_nr_last = NVGPU_IOCTL_CHANNEL_LAST; | ||
1800 | gpu->as_ioctl_nr_last = NVGPU_AS_IOCTL_LAST; | ||
1801 | |||
1802 | gpu->gpu_va_bit_count = 40; | ||
1803 | |||
1784 | gpu->reserved = 0; | 1804 | gpu->reserved = 0; |
1785 | 1805 | ||
1786 | return 0; | 1806 | return 0; |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index a56614ab..3f070a58 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -137,6 +137,7 @@ struct gpu_ops { | |||
137 | struct gr_zcull_info *zcull_params); | 137 | struct gr_zcull_info *zcull_params); |
138 | bool (*is_tpc_addr)(u32 addr); | 138 | bool (*is_tpc_addr)(u32 addr); |
139 | u32 (*get_tpc_num)(u32 addr); | 139 | u32 (*get_tpc_num)(u32 addr); |
140 | void (*detect_sm_arch)(struct gk20a *g); | ||
140 | } gr; | 141 | } gr; |
141 | const char *name; | 142 | const char *name; |
142 | struct { | 143 | struct { |
@@ -304,7 +305,8 @@ struct gpu_ops { | |||
304 | void (*l2_flush)(struct gk20a *g, bool invalidate); | 305 | void (*l2_flush)(struct gk20a *g, bool invalidate); |
305 | void (*tlb_invalidate)(struct vm_gk20a *vm); | 306 | void (*tlb_invalidate)(struct vm_gk20a *vm); |
306 | void (*set_big_page_size)(struct gk20a *g, | 307 | void (*set_big_page_size)(struct gk20a *g, |
307 | void *inst_ptr, int size); | 308 | void *inst_ptr, int size); |
309 | u32 (*get_big_page_sizes)(void); | ||
308 | } mm; | 310 | } mm; |
309 | struct { | 311 | struct { |
310 | int (*prepare_ucode)(struct gk20a *g); | 312 | int (*prepare_ucode)(struct gk20a *g); |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 2c62c790..da257cd4 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -3490,6 +3490,27 @@ int gr_gk20a_get_zcull_info(struct gk20a *g, struct gr_gk20a *gr, | |||
3490 | return 0; | 3490 | return 0; |
3491 | } | 3491 | } |
3492 | 3492 | ||
3493 | static void gr_gk20a_detect_sm_arch(struct gk20a *g) | ||
3494 | { | ||
3495 | u32 v = gk20a_readl(g, gr_gpc0_tpc0_sm_arch_r()); | ||
3496 | |||
3497 | u32 raw_version = gr_gpc0_tpc0_sm_arch_spa_version_v(v); | ||
3498 | u32 version = 0; | ||
3499 | |||
3500 | if (raw_version == gr_gpc0_tpc0_sm_arch_spa_version_smkepler_lp_v()) | ||
3501 | version = 0x320; /* SM 3.2 */ | ||
3502 | else | ||
3503 | gk20a_err(dev_from_gk20a(g), "Unknown SM version 0x%x\n", | ||
3504 | raw_version); | ||
3505 | |||
3506 | /* on Kepler, SM version == SPA version */ | ||
3507 | g->gpu_characteristics.sm_arch_spa_version = version; | ||
3508 | g->gpu_characteristics.sm_arch_sm_version = version; | ||
3509 | |||
3510 | g->gpu_characteristics.sm_arch_warp_count = | ||
3511 | gr_gpc0_tpc0_sm_arch_warp_count_v(v); | ||
3512 | } | ||
3513 | |||
3493 | static int gr_gk20a_add_zbc_color(struct gk20a *g, struct gr_gk20a *gr, | 3514 | static int gr_gk20a_add_zbc_color(struct gk20a *g, struct gr_gk20a *gr, |
3494 | struct zbc_entry *color_val, u32 index) | 3515 | struct zbc_entry *color_val, u32 index) |
3495 | { | 3516 | { |
@@ -7328,5 +7349,6 @@ void gk20a_init_gr_ops(struct gpu_ops *gops) | |||
7328 | gops->gr.get_zcull_info = gr_gk20a_get_zcull_info; | 7349 | gops->gr.get_zcull_info = gr_gk20a_get_zcull_info; |
7329 | gops->gr.is_tpc_addr = gr_gk20a_is_tpc_addr; | 7350 | gops->gr.is_tpc_addr = gr_gk20a_is_tpc_addr; |
7330 | gops->gr.get_tpc_num = gr_gk20a_get_tpc_num; | 7351 | gops->gr.get_tpc_num = gr_gk20a_get_tpc_num; |
7352 | gops->gr.detect_sm_arch = gr_gk20a_detect_sm_arch; | ||
7331 | } | 7353 | } |
7332 | 7354 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h index 3b16df58..f89bb2a4 100644 --- a/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h | |||
@@ -1886,6 +1886,22 @@ static inline u32 gr_gpc0_tpc0_sm_cfg_sm_id_f(u32 v) | |||
1886 | { | 1886 | { |
1887 | return (v & 0xffff) << 0; | 1887 | return (v & 0xffff) << 0; |
1888 | } | 1888 | } |
1889 | static inline u32 gr_gpc0_tpc0_sm_arch_r(void) | ||
1890 | { | ||
1891 | return 0x0050469c; | ||
1892 | } | ||
1893 | static inline u32 gr_gpc0_tpc0_sm_arch_warp_count_v(u32 r) | ||
1894 | { | ||
1895 | return (r >> 0) & 0xff; | ||
1896 | } | ||
1897 | static inline u32 gr_gpc0_tpc0_sm_arch_spa_version_v(u32 r) | ||
1898 | { | ||
1899 | return (r >> 8) & 0xf; | ||
1900 | } | ||
1901 | static inline u32 gr_gpc0_tpc0_sm_arch_spa_version_smkepler_lp_v(void) | ||
1902 | { | ||
1903 | return 0x0000000c; | ||
1904 | } | ||
1889 | static inline u32 gr_gpc0_ppc0_pes_vsc_strem_r(void) | 1905 | static inline u32 gr_gpc0_ppc0_pes_vsc_strem_r(void) |
1890 | { | 1906 | { |
1891 | return 0x00503018; | 1907 | return 0x00503018; |