From d11fbfe7b1b68b3aab93f7703896d95d40b79a58 Mon Sep 17 00:00:00 2001
From: Sami Kiminki <skiminki@nvidia.com>
Date: Fri, 24 Oct 2014 20:40:57 +0300
Subject: gpu: nvgpu: GPU characteristics additions

Add the following info into GPU characteristics: available big page
sizes, support indicators for sync fence fds and cycle stats, gpc
mask, SM version, SM SPA version and warp count, and IOCTL interface
levels. Also, add new IOCTL to fetch TPC masks.

Bug 1551769
Bug 1558186

Change-Id: I8a47d882645f29c7bf0c8f74334ebf47240e41de
Signed-off-by: Sami Kiminki <skiminki@nvidia.com>
Reviewed-on: http://git-master/r/562904
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c  | 28 ++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/gk20a/gk20a.c       | 22 +++++++++++++++++++++-
 drivers/gpu/nvgpu/gk20a/gk20a.h       |  4 +++-
 drivers/gpu/nvgpu/gk20a/gr_gk20a.c    | 22 ++++++++++++++++++++++
 drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h | 16 ++++++++++++++++
 drivers/gpu/nvgpu/gm20b/gr_gm20b.c    | 13 +++++++++++++
 drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h | 16 ++++++++++++++++
 drivers/gpu/nvgpu/gm20b/mm_gm20b.c    |  6 ++++++
 8 files changed, 125 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
index 93831844..3bcbdfd9 100644
--- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
@@ -221,6 +221,30 @@ clean_up:
 	return err;
 }
 
+static int gk20a_ctrl_get_tpc_masks(struct gk20a *g,
+				    struct nvgpu_gpu_get_tpc_masks_args *args)
+{
+	struct gr_gk20a *gr = &g->gr;
+	int err = 0;
+	const u32 gpc_tpc_mask_size = sizeof(u32) * gr->gpc_count;
+
+	if (args->mask_buf_size > 0) {
+		size_t write_size = gpc_tpc_mask_size;
+
+		if (write_size > args->mask_buf_size)
+			write_size = args->mask_buf_size;
+
+		err = copy_to_user((void __user *)(uintptr_t)
+				   args->mask_buf_addr,
+				   gr->gpc_tpc_mask, write_size);
+	}
+
+	if (err == 0)
+		args->mask_buf_size = gpc_tpc_mask_size;
+
+	return err;
+}
+
 long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
 	struct platform_device *dev = filp->private_data;
@@ -390,6 +414,10 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
 		err = gk20a_ctrl_open_tsg(g,
 			(struct nvgpu_gpu_open_tsg_args *)buf);
 		break;
+	case NVGPU_GPU_IOCTL_GET_TPC_MASKS:
+		err = gk20a_ctrl_get_tpc_masks(g,
+			(struct nvgpu_gpu_get_tpc_masks_args *)buf);
+		break;
 	default:
 		dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd);
 		err = -ENOTTY;
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index ef0f6a8c..1bd1c898 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -1774,13 +1774,33 @@ int gk20a_init_gpu_characteristics(struct gk20a *g)
 	gpu->compression_page_size = g->mm.pmu.vm.compression_page_size;
 	gpu->pde_coverage_bit_count = g->mm.pmu.vm.pde_stride_shift;
 
+	gpu->available_big_page_sizes = gpu->big_page_size;
+	if (g->ops.mm.get_big_page_sizes)
+		gpu->available_big_page_sizes |= g->ops.mm.get_big_page_sizes();
+
 	gpu->flags = NVGPU_GPU_FLAGS_SUPPORT_PARTIAL_MAPPINGS
-		| NVGPU_GPU_FLAGS_SUPPORT_SPARSE_ALLOCS;
+		| NVGPU_GPU_FLAGS_SUPPORT_SPARSE_ALLOCS
+		| NVGPU_GPU_FLAGS_SUPPORT_SYNC_FENCE_FDS;
 
 	if (IS_ENABLED(CONFIG_TEGRA_GK20A) &&
 	    gk20a_platform_has_syncpoints(g->dev))
 		gpu->flags |= NVGPU_GPU_FLAGS_HAS_SYNCPOINTS;
 
+	if (IS_ENABLED(CONFIG_GK20A_CYCLE_STATS))
+		gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS;
+
+	gpu->gpc_mask = 1;
+
+	g->ops.gr.detect_sm_arch(g);
+
+	gpu->gpu_ioctl_nr_last = NVGPU_GPU_IOCTL_LAST;
+	gpu->tsg_ioctl_nr_last = NVGPU_TSG_IOCTL_LAST;
+	gpu->dbg_gpu_ioctl_nr_last = NVGPU_DBG_GPU_IOCTL_LAST;
+	gpu->ioctl_channel_nr_last = NVGPU_IOCTL_CHANNEL_LAST;
+	gpu->as_ioctl_nr_last = NVGPU_AS_IOCTL_LAST;
+
+	gpu->gpu_va_bit_count = 40;
+
 	gpu->reserved = 0;
 
 	return 0;
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index a56614ab..3f070a58 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -137,6 +137,7 @@ struct gpu_ops {
 				struct gr_zcull_info *zcull_params);
 		bool (*is_tpc_addr)(u32 addr);
 		u32 (*get_tpc_num)(u32 addr);
+		void (*detect_sm_arch)(struct gk20a *g);
 	} gr;
 	const char *name;
 	struct {
@@ -304,7 +305,8 @@ struct gpu_ops {
 		void (*l2_flush)(struct gk20a *g, bool invalidate);
 		void (*tlb_invalidate)(struct vm_gk20a *vm);
 		void (*set_big_page_size)(struct gk20a *g,
-					 void *inst_ptr, int size);
+					  void *inst_ptr, int size);
+		u32 (*get_big_page_sizes)(void);
 	} mm;
 	struct {
 		int (*prepare_ucode)(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 2c62c790..da257cd4 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -3490,6 +3490,27 @@ int gr_gk20a_get_zcull_info(struct gk20a *g, struct gr_gk20a *gr,
 	return 0;
 }
 
+static void gr_gk20a_detect_sm_arch(struct gk20a *g)
+{
+	u32 v = gk20a_readl(g, gr_gpc0_tpc0_sm_arch_r());
+
+	u32 raw_version = gr_gpc0_tpc0_sm_arch_spa_version_v(v);
+	u32 version = 0;
+
+	if (raw_version == gr_gpc0_tpc0_sm_arch_spa_version_smkepler_lp_v())
+		version = 0x320; /* SM 3.2 */
+	else
+		gk20a_err(dev_from_gk20a(g), "Unknown SM version 0x%x\n",
+			  raw_version);
+
+	/* on Kepler, SM version == SPA version */
+	g->gpu_characteristics.sm_arch_spa_version = version;
+	g->gpu_characteristics.sm_arch_sm_version = version;
+
+	g->gpu_characteristics.sm_arch_warp_count =
+		gr_gpc0_tpc0_sm_arch_warp_count_v(v);
+}
+
 static int gr_gk20a_add_zbc_color(struct gk20a *g, struct gr_gk20a *gr,
 				  struct zbc_entry *color_val, u32 index)
 {
@@ -7328,5 +7349,6 @@ void gk20a_init_gr_ops(struct gpu_ops *gops)
 	gops->gr.get_zcull_info = gr_gk20a_get_zcull_info;
 	gops->gr.is_tpc_addr = gr_gk20a_is_tpc_addr;
 	gops->gr.get_tpc_num = gr_gk20a_get_tpc_num;
+	gops->gr.detect_sm_arch = gr_gk20a_detect_sm_arch;
 }
 
diff --git a/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h
index 3b16df58..f89bb2a4 100644
--- a/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h
@@ -1886,6 +1886,22 @@ static inline u32 gr_gpc0_tpc0_sm_cfg_sm_id_f(u32 v)
 {
 	return (v & 0xffff) << 0;
 }
+static inline u32 gr_gpc0_tpc0_sm_arch_r(void)
+{
+	return 0x0050469c;
+}
+static inline u32 gr_gpc0_tpc0_sm_arch_warp_count_v(u32 r)
+{
+	return (r >> 0) & 0xff;
+}
+static inline u32 gr_gpc0_tpc0_sm_arch_spa_version_v(u32 r)
+{
+	return (r >> 8) & 0xf;
+}
+static inline u32 gr_gpc0_tpc0_sm_arch_spa_version_smkepler_lp_v(void)
+{
+	return 0x0000000c;
+}
 static inline u32 gr_gpc0_ppc0_pes_vsc_strem_r(void)
 {
 	return 0x00503018;
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
index d40e9d52..8f056181 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -748,6 +748,18 @@ static int gr_gm20b_load_ctxsw_ucode(struct gk20a *g)
 
 #endif
 
+static void gr_gm20b_detect_sm_arch(struct gk20a *g)
+{
+	u32 v = gk20a_readl(g, gr_gpc0_tpc0_sm_arch_r());
+
+	g->gpu_characteristics.sm_arch_spa_version =
+		gr_gpc0_tpc0_sm_arch_spa_version_v(v);
+	g->gpu_characteristics.sm_arch_sm_version =
+		gr_gpc0_tpc0_sm_arch_sm_version_v(v);
+	g->gpu_characteristics.sm_arch_warp_count =
+		gr_gpc0_tpc0_sm_arch_warp_count_v(v);
+}
+
 void gm20b_init_gr(struct gpu_ops *gops)
 {
 	gops->gr.init_gpc_mmu = gr_gm20b_init_gpc_mmu;
@@ -781,4 +793,5 @@ void gm20b_init_gr(struct gpu_ops *gops)
 	gops->gr.get_zcull_info = gr_gk20a_get_zcull_info;
 	gops->gr.is_tpc_addr = gr_gm20b_is_tpc_addr;
 	gops->gr.get_tpc_num = gr_gm20b_get_tpc_num;
+	gops->gr.detect_sm_arch = gr_gm20b_detect_sm_arch;
 }
diff --git a/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h
index 0dae5896..8e4308a3 100644
--- a/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h
+++ b/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h
@@ -1878,6 +1878,22 @@ static inline u32 gr_gpc0_tpc0_sm_cfg_sm_id_f(u32 v)
 {
 	return (v & 0xffff) << 0;
 }
+static inline u32 gr_gpc0_tpc0_sm_arch_r(void)
+{
+	return 0x0050469c;
+}
+static inline u32 gr_gpc0_tpc0_sm_arch_warp_count_v(u32 r)
+{
+	return (r >> 0) & 0xff;
+}
+static inline u32 gr_gpc0_tpc0_sm_arch_spa_version_v(u32 r)
+{
+	return (r >> 8) & 0xfff;
+}
+static inline u32 gr_gpc0_tpc0_sm_arch_sm_version_v(u32 r)
+{
+	return (r >> 20) & 0xfff;
+}
 static inline u32 gr_gpc0_ppc0_pes_vsc_strem_r(void)
 {
 	return 0x00503018;
diff --git a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
index 030701b9..678ef4fd 100644
--- a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
@@ -280,6 +280,11 @@ static void gm20b_mm_set_big_page_size(struct gk20a *g,
 	gk20a_dbg_fn("done");
 }
 
+u32 gm20b_mm_get_big_page_sizes(void)
+{
+	return SZ_64K | SZ_128K;
+}
+
 void gm20b_init_mm(struct gpu_ops *gops)
 {
 	gops->mm.set_sparse = gm20b_vm_put_sparse;
@@ -295,4 +300,5 @@ void gm20b_init_mm(struct gpu_ops *gops)
 	gops->mm.l2_flush = gk20a_mm_l2_flush;
 	gops->mm.tlb_invalidate = gk20a_mm_tlb_invalidate;
 	gops->mm.set_big_page_size = gm20b_mm_set_big_page_size;
+	gops->mm.get_big_page_sizes = gm20b_mm_get_big_page_sizes;
 }
-- 
cgit v1.2.2