gpu: nvgpu: GPU characteristics additions

Add the following info into GPU characteristics: available big page sizes, support indicators for sync fence fds and cycle stats, gpc mask, SM version, SM SPA version and warp count, and IOCTL interface levels. Also, add new IOCTL to fetch TPC masks. Bug 1551769 Bug 1558186 Change-Id: I8a47d882645f29c7bf0c8f74334ebf47240e41de Signed-off-by: Sami Kiminki <skiminki@nvidia.com> Reviewed-on: http://git-master/r/562904 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
author: Sami Kiminki <skiminki@nvidia.com> 2014-10-24 13:40:57 -0400
committer: Dan Willemsen <dwillemsen@nvidia.com> 2015-03-18 15:12:07 -0400
commit: d11fbfe7b1b68b3aab93f7703896d95d40b79a58 (patch)
tree: a4b8574c9181284523efa5105878c2e3ef2e05fa /drivers/gpu/nvgpu/gk20a
parent: 2c5fdd1c8a76ef9ca21abcf894f2c9525d57fd49 (diff)
5 files changed, 90 insertions, 2 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
index 93831844..3bcbdfd9 100644
--- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
@@ -221,6 +221,30 @@ clean_up:
        return err;
 }
+static int gk20a_ctrl_get_tpc_masks(struct gk20a *g,
+                                    struct nvgpu_gpu_get_tpc_masks_args *args)
+{
+        struct gr_gk20a *gr = &g->gr;
+        int err = 0;
+        const u32 gpc_tpc_mask_size = sizeof(u32) * gr->gpc_count;
+        if (args->mask_buf_size > 0) {
+                size_t write_size = gpc_tpc_mask_size;
+                if (write_size > args->mask_buf_size)
+                        write_size = args->mask_buf_size;
+                err = copy_to_user((void __user *)(uintptr_t)
+                                   args->mask_buf_addr,
+                                   gr->gpc_tpc_mask, write_size);
+        }
+        if (err == 0)
+                args->mask_buf_size = gpc_tpc_mask_size;
+        return err;
+}
 long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
        struct platform_device *dev = filp->private_data;
@@ -390,6 +414,10 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
                err = gk20a_ctrl_open_tsg(g,
                        (struct nvgpu_gpu_open_tsg_args *)buf);
                break;
+        case NVGPU_GPU_IOCTL_GET_TPC_MASKS:
+                err = gk20a_ctrl_get_tpc_masks(g,
+                        (struct nvgpu_gpu_get_tpc_masks_args *)buf);
+                break;
        default:
                dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd);
                err = -ENOTTY;
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index ef0f6a8c..1bd1c898 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -1774,13 +1774,33 @@ int gk20a_init_gpu_characteristics(struct gk20a *g)
        gpu->compression_page_size = g->mm.pmu.vm.compression_page_size;
        gpu->pde_coverage_bit_count = g->mm.pmu.vm.pde_stride_shift;
+        gpu->available_big_page_sizes = gpu->big_page_size;
+        if (g->ops.mm.get_big_page_sizes)
+                gpu->available_big_page_sizes |= g->ops.mm.get_big_page_sizes();
        gpu->flags = NVGPU_GPU_FLAGS_SUPPORT_PARTIAL_MAPPINGS
-                | NVGPU_GPU_FLAGS_SUPPORT_SPARSE_ALLOCS;
+                | NVGPU_GPU_FLAGS_SUPPORT_SPARSE_ALLOCS
+                | NVGPU_GPU_FLAGS_SUPPORT_SYNC_FENCE_FDS;
        if (IS_ENABLED(CONFIG_TEGRA_GK20A) &&
            gk20a_platform_has_syncpoints(g->dev))
                gpu->flags |= NVGPU_GPU_FLAGS_HAS_SYNCPOINTS;
+        if (IS_ENABLED(CONFIG_GK20A_CYCLE_STATS))
+                gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS;
+        gpu->gpc_mask = 1;
+        g->ops.gr.detect_sm_arch(g);
+        gpu->gpu_ioctl_nr_last = NVGPU_GPU_IOCTL_LAST;
+        gpu->tsg_ioctl_nr_last = NVGPU_TSG_IOCTL_LAST;
+        gpu->dbg_gpu_ioctl_nr_last = NVGPU_DBG_GPU_IOCTL_LAST;
+        gpu->ioctl_channel_nr_last = NVGPU_IOCTL_CHANNEL_LAST;
+        gpu->as_ioctl_nr_last = NVGPU_AS_IOCTL_LAST;
+        gpu->gpu_va_bit_count = 40;
        gpu->reserved = 0;
        return 0;
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index a56614ab..3f070a58 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -137,6 +137,7 @@ struct gpu_ops {
                                struct gr_zcull_info *zcull_params);
                bool (*is_tpc_addr)(u32 addr);
                u32 (*get_tpc_num)(u32 addr);
+                void (*detect_sm_arch)(struct gk20a *g);
        } gr;
        const char *name;
        struct {
@@ -304,7 +305,8 @@ struct gpu_ops {
                void (*l2_flush)(struct gk20a *g, bool invalidate);
                void (*tlb_invalidate)(struct vm_gk20a *vm);
                void (*set_big_page_size)(struct gk20a *g,
-                                         void *inst_ptr, int size);
+                                          void *inst_ptr, int size);
+                u32 (*get_big_page_sizes)(void);
        } mm;
        struct {
                int (*prepare_ucode)(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 2c62c790..da257cd4 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -3490,6 +3490,27 @@ int gr_gk20a_get_zcull_info(struct gk20a *g, struct gr_gk20a *gr,
        return 0;
 }
+static void gr_gk20a_detect_sm_arch(struct gk20a *g)
+{
+        u32 v = gk20a_readl(g, gr_gpc0_tpc0_sm_arch_r());
+        u32 raw_version = gr_gpc0_tpc0_sm_arch_spa_version_v(v);
+        u32 version = 0;
+        if (raw_version == gr_gpc0_tpc0_sm_arch_spa_version_smkepler_lp_v())
+                version = 0x320; /* SM 3.2 */
+        else
+                gk20a_err(dev_from_gk20a(g), "Unknown SM version 0x%x\n",
+                          raw_version);
+        /* on Kepler, SM version == SPA version */
+        g->gpu_characteristics.sm_arch_spa_version = version;
+        g->gpu_characteristics.sm_arch_sm_version = version;
+        g->gpu_characteristics.sm_arch_warp_count =
+                gr_gpc0_tpc0_sm_arch_warp_count_v(v);
+}
 static int gr_gk20a_add_zbc_color(struct gk20a *g, struct gr_gk20a *gr,
                                  struct zbc_entry *color_val, u32 index)
 {
@@ -7328,5 +7349,6 @@ void gk20a_init_gr_ops(struct gpu_ops *gops)
        gops->gr.get_zcull_info = gr_gk20a_get_zcull_info;
        gops->gr.is_tpc_addr = gr_gk20a_is_tpc_addr;
        gops->gr.get_tpc_num = gr_gk20a_get_tpc_num;
+        gops->gr.detect_sm_arch = gr_gk20a_detect_sm_arch;
 }
diff --git a/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h
index 3b16df58..f89bb2a4 100644
--- a/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h
@@ -1886,6 +1886,22 @@ static inline u32 gr_gpc0_tpc0_sm_cfg_sm_id_f(u32 v)
 {
        return (v & 0xffff) << 0;
 }
+static inline u32 gr_gpc0_tpc0_sm_arch_r(void)
+{
+        return 0x0050469c;
+}
+static inline u32 gr_gpc0_tpc0_sm_arch_warp_count_v(u32 r)
+{
+        return (r >> 0) & 0xff;
+}
+static inline u32 gr_gpc0_tpc0_sm_arch_spa_version_v(u32 r)
+{
+        return (r >> 8) & 0xf;
+}
+static inline u32 gr_gpc0_tpc0_sm_arch_spa_version_smkepler_lp_v(void)
+{
+        return 0x0000000c;
+}
 static inline u32 gr_gpc0_ppc0_pes_vsc_strem_r(void)
 {
        return 0x00503018;
author	Sami Kiminki <skiminki@nvidia.com>	2014-10-24 13:40:57 -0400
committer	Dan Willemsen <dwillemsen@nvidia.com>	2015-03-18 15:12:07 -0400
commit	d11fbfe7b1b68b3aab93f7703896d95d40b79a58 (patch)
tree	a4b8574c9181284523efa5105878c2e3ef2e05fa /drivers/gpu/nvgpu/gk20a
parent	2c5fdd1c8a76ef9ca21abcf894f2c9525d57fd49 (diff)

diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c index 93831844..3bcbdfd9 100644 --- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
@@ -221,6 +221,30 @@ clean_up:
221	return err;	221	return err;
222	}	222	}
223		223
		224	static int gk20a_ctrl_get_tpc_masks(struct gk20a *g,
		225	struct nvgpu_gpu_get_tpc_masks_args *args)
		226	{
		227	struct gr_gk20a *gr = &g->gr;
		228	int err = 0;
		229	const u32 gpc_tpc_mask_size = sizeof(u32) * gr->gpc_count;
		230
		231	if (args->mask_buf_size > 0) {
		232	size_t write_size = gpc_tpc_mask_size;
		233
		234	if (write_size > args->mask_buf_size)
		235	write_size = args->mask_buf_size;
		236
		237	err = copy_to_user((void __user *)(uintptr_t)
		238	args->mask_buf_addr,
		239	gr->gpc_tpc_mask, write_size);
		240	}
		241
		242	if (err == 0)
		243	args->mask_buf_size = gpc_tpc_mask_size;
		244
		245	return err;
		246	}
		247
224	long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)	248	long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
225	{	249	{
226	struct platform_device *dev = filp->private_data;	250	struct platform_device *dev = filp->private_data;
@@ -390,6 +414,10 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
390	err = gk20a_ctrl_open_tsg(g,	414	err = gk20a_ctrl_open_tsg(g,
391	(struct nvgpu_gpu_open_tsg_args *)buf);	415	(struct nvgpu_gpu_open_tsg_args *)buf);
392	break;	416	break;
		417	case NVGPU_GPU_IOCTL_GET_TPC_MASKS:
		418	err = gk20a_ctrl_get_tpc_masks(g,
		419	(struct nvgpu_gpu_get_tpc_masks_args *)buf);
		420	break;
393	default:	421	default:
394	dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd);	422	dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd);
395	err = -ENOTTY;	423	err = -ENOTTY;


diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index ef0f6a8c..1bd1c898 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -1774,13 +1774,33 @@ int gk20a_init_gpu_characteristics(struct gk20a *g)
1774	gpu->compression_page_size = g->mm.pmu.vm.compression_page_size;	1774	gpu->compression_page_size = g->mm.pmu.vm.compression_page_size;
1775	gpu->pde_coverage_bit_count = g->mm.pmu.vm.pde_stride_shift;	1775	gpu->pde_coverage_bit_count = g->mm.pmu.vm.pde_stride_shift;
1776		1776
		1777	gpu->available_big_page_sizes = gpu->big_page_size;
		1778	if (g->ops.mm.get_big_page_sizes)
		1779	gpu->available_big_page_sizes \|= g->ops.mm.get_big_page_sizes();
		1780
1777	gpu->flags = NVGPU_GPU_FLAGS_SUPPORT_PARTIAL_MAPPINGS	1781	gpu->flags = NVGPU_GPU_FLAGS_SUPPORT_PARTIAL_MAPPINGS
1778	\| NVGPU_GPU_FLAGS_SUPPORT_SPARSE_ALLOCS;	1782	\| NVGPU_GPU_FLAGS_SUPPORT_SPARSE_ALLOCS
		1783	\| NVGPU_GPU_FLAGS_SUPPORT_SYNC_FENCE_FDS;
1779		1784
1780	if (IS_ENABLED(CONFIG_TEGRA_GK20A) &&	1785	if (IS_ENABLED(CONFIG_TEGRA_GK20A) &&
1781	gk20a_platform_has_syncpoints(g->dev))	1786	gk20a_platform_has_syncpoints(g->dev))
1782	gpu->flags \|= NVGPU_GPU_FLAGS_HAS_SYNCPOINTS;	1787	gpu->flags \|= NVGPU_GPU_FLAGS_HAS_SYNCPOINTS;
1783		1788
		1789	if (IS_ENABLED(CONFIG_GK20A_CYCLE_STATS))
		1790	gpu->flags \|= NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS;
		1791
		1792	gpu->gpc_mask = 1;
		1793
		1794	g->ops.gr.detect_sm_arch(g);
		1795
		1796	gpu->gpu_ioctl_nr_last = NVGPU_GPU_IOCTL_LAST;
		1797	gpu->tsg_ioctl_nr_last = NVGPU_TSG_IOCTL_LAST;
		1798	gpu->dbg_gpu_ioctl_nr_last = NVGPU_DBG_GPU_IOCTL_LAST;
		1799	gpu->ioctl_channel_nr_last = NVGPU_IOCTL_CHANNEL_LAST;
		1800	gpu->as_ioctl_nr_last = NVGPU_AS_IOCTL_LAST;
		1801
		1802	gpu->gpu_va_bit_count = 40;
		1803
1784	gpu->reserved = 0;	1804	gpu->reserved = 0;
1785		1805
1786	return 0;	1806	return 0;


diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index a56614ab..3f070a58 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -137,6 +137,7 @@ struct gpu_ops {
137	struct gr_zcull_info *zcull_params);	137	struct gr_zcull_info *zcull_params);
138	bool (*is_tpc_addr)(u32 addr);	138	bool (*is_tpc_addr)(u32 addr);
139	u32 (*get_tpc_num)(u32 addr);	139	u32 (*get_tpc_num)(u32 addr);
		140	void (detect_sm_arch)(struct gk20a g);
140	} gr;	141	} gr;
141	const char *name;	142	const char *name;
142	struct {	143	struct {
@@ -304,7 +305,8 @@ struct gpu_ops {
304	void (l2_flush)(struct gk20a g, bool invalidate);	305	void (l2_flush)(struct gk20a g, bool invalidate);
305	void (tlb_invalidate)(struct vm_gk20a vm);	306	void (tlb_invalidate)(struct vm_gk20a vm);
306	void (set_big_page_size)(struct gk20a g,	307	void (set_big_page_size)(struct gk20a g,
307	void *inst_ptr, int size);	308	void *inst_ptr, int size);
		309	u32 (*get_big_page_sizes)(void);
308	} mm;	310	} mm;
309	struct {	311	struct {
310	int (prepare_ucode)(struct gk20a g);	312	int (prepare_ucode)(struct gk20a g);


diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 2c62c790..da257cd4 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -3490,6 +3490,27 @@ int gr_gk20a_get_zcull_info(struct gk20a g, struct gr_gk20a gr,
3490	return 0;	3490	return 0;
3491	}	3491	}
3492		3492
		3493	static void gr_gk20a_detect_sm_arch(struct gk20a *g)
		3494	{
		3495	u32 v = gk20a_readl(g, gr_gpc0_tpc0_sm_arch_r());
		3496
		3497	u32 raw_version = gr_gpc0_tpc0_sm_arch_spa_version_v(v);
		3498	u32 version = 0;
		3499
		3500	if (raw_version == gr_gpc0_tpc0_sm_arch_spa_version_smkepler_lp_v())
		3501	version = 0x320; /* SM 3.2 */
		3502	else
		3503	gk20a_err(dev_from_gk20a(g), "Unknown SM version 0x%x\n",
		3504	raw_version);
		3505
		3506	/* on Kepler, SM version == SPA version */
		3507	g->gpu_characteristics.sm_arch_spa_version = version;
		3508	g->gpu_characteristics.sm_arch_sm_version = version;
		3509
		3510	g->gpu_characteristics.sm_arch_warp_count =
		3511	gr_gpc0_tpc0_sm_arch_warp_count_v(v);
		3512	}
		3513
3493	static int gr_gk20a_add_zbc_color(struct gk20a g, struct gr_gk20a gr,	3514	static int gr_gk20a_add_zbc_color(struct gk20a g, struct gr_gk20a gr,
3494	struct zbc_entry *color_val, u32 index)	3515	struct zbc_entry *color_val, u32 index)
3495	{	3516	{
@@ -7328,5 +7349,6 @@ void gk20a_init_gr_ops(struct gpu_ops *gops)
7328	gops->gr.get_zcull_info = gr_gk20a_get_zcull_info;	7349	gops->gr.get_zcull_info = gr_gk20a_get_zcull_info;
7329	gops->gr.is_tpc_addr = gr_gk20a_is_tpc_addr;	7350	gops->gr.is_tpc_addr = gr_gk20a_is_tpc_addr;
7330	gops->gr.get_tpc_num = gr_gk20a_get_tpc_num;	7351	gops->gr.get_tpc_num = gr_gk20a_get_tpc_num;
		7352	gops->gr.detect_sm_arch = gr_gk20a_detect_sm_arch;
7331	}	7353	}
7332		7354


diff --git a/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h index 3b16df58..f89bb2a4 100644 --- a/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h
@@ -1886,6 +1886,22 @@ static inline u32 gr_gpc0_tpc0_sm_cfg_sm_id_f(u32 v)
1886	{	1886	{
1887	return (v & 0xffff) << 0;	1887	return (v & 0xffff) << 0;
1888	}	1888	}
		1889	static inline u32 gr_gpc0_tpc0_sm_arch_r(void)
		1890	{
		1891	return 0x0050469c;
		1892	}
		1893	static inline u32 gr_gpc0_tpc0_sm_arch_warp_count_v(u32 r)
		1894	{
		1895	return (r >> 0) & 0xff;
		1896	}
		1897	static inline u32 gr_gpc0_tpc0_sm_arch_spa_version_v(u32 r)
		1898	{
		1899	return (r >> 8) & 0xf;
		1900	}
		1901	static inline u32 gr_gpc0_tpc0_sm_arch_spa_version_smkepler_lp_v(void)
		1902	{
		1903	return 0x0000000c;
		1904	}
1889	static inline u32 gr_gpc0_ppc0_pes_vsc_strem_r(void)	1905	static inline u32 gr_gpc0_ppc0_pes_vsc_strem_r(void)
1890	{	1906	{
1891	return 0x00503018;	1907	return 0x00503018;