From a21e56d584641202327f64741b06b1cd9633d0f6 Mon Sep 17 00:00:00 2001
From: Peter Daifuku <pdaifuku@nvidia.com>
Date: Thu, 31 Mar 2016 14:57:20 -0700
Subject: gpu: nvgpu: vgpu: add support for VSM ioctls

Add virtualized support for NUM_VSMS and VSMS_MAPPING ioctls.

This requires adding an attribute request for the RM server, GPC0_TPC_COUNT

JIRASW EVLR-253

Change-Id: Icaab4fadbbc9eab5d00cf78132928686944162df
Signed-off-by: Peter Daifuku <pdaifuku@nvidia.com>
Reviewed-on: http://git-master/r/1130615
(cherry picked from commit 78514079382b0de48457db340e3479e99a012040)
Reviewed-on: http://git-master/r/1133865
(cherry picked from commit 27a8e645e2787a43d0073f0be6e8f64c0f183228)
Reviewed-on: http://git-master/r/1122553
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/vgpu/gk20a/vgpu_gr_gk20a.c | 30 ++++++++++++++++-
 drivers/gpu/nvgpu/vgpu/gm20b/vgpu_gr_gm20b.c | 25 ++++++++++++++-
 drivers/gpu/nvgpu/vgpu/gr_vgpu.c             | 48 +++++++++++++++++++++++++---
 include/linux/tegra_vgpu.h                   |  3 +-
 4 files changed, 99 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/nvgpu/vgpu/gk20a/vgpu_gr_gk20a.c b/drivers/gpu/nvgpu/vgpu/gk20a/vgpu_gr_gk20a.c
index 0a2ca743..2cfe16da 100644
--- a/drivers/gpu/nvgpu/vgpu/gk20a/vgpu_gr_gk20a.c
+++ b/drivers/gpu/nvgpu/vgpu/gk20a/vgpu_gr_gk20a.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2015-2016, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -45,7 +45,35 @@ static void vgpu_gk20a_detect_sm_arch(struct gk20a *g)
 		gr_gpc0_tpc0_sm_arch_warp_count_v(v);
 }
 
+static int vgpu_gk20a_init_fs_state(struct gk20a *g)
+{
+	struct gr_gk20a *gr = &g->gr;
+	u32 tpc_index, gpc_index;
+	u32 sm_id = 0;
+
+	gk20a_dbg_fn("");
+
+	for (tpc_index = 0; tpc_index < gr->max_tpc_per_gpc_count;
+		tpc_index++) {
+		for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
+			if (tpc_index < gr->gpc_tpc_count[gpc_index]) {
+				g->gr.sm_to_cluster[sm_id].tpc_index =
+								tpc_index;
+				g->gr.sm_to_cluster[sm_id].gpc_index =
+								gpc_index;
+
+				sm_id++;
+			}
+		}
+	}
+
+	gr->no_of_sm = sm_id;
+
+	return 0;
+}
+
 void vgpu_gk20a_init_gr_ops(struct gpu_ops *gops)
 {
 	gops->gr.detect_sm_arch = vgpu_gk20a_detect_sm_arch;
+	gops->gr.init_fs_state = vgpu_gk20a_init_fs_state;
 }
diff --git a/drivers/gpu/nvgpu/vgpu/gm20b/vgpu_gr_gm20b.c b/drivers/gpu/nvgpu/vgpu/gm20b/vgpu_gr_gm20b.c
index 07d41b2e..fb1f31d8 100644
--- a/drivers/gpu/nvgpu/vgpu/gm20b/vgpu_gr_gm20b.c
+++ b/drivers/gpu/nvgpu/vgpu/gm20b/vgpu_gr_gm20b.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2015-2016, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -37,7 +37,30 @@ static void vgpu_gm20b_detect_sm_arch(struct gk20a *g)
 		gr_gpc0_tpc0_sm_arch_warp_count_v(v);
 }
 
+static int vgpu_gm20b_init_fs_state(struct gk20a *g)
+{
+	struct gr_gk20a *gr = &g->gr;
+	u32 tpc_index, gpc_index;
+	u32 sm_id = 0;
+
+	gk20a_dbg_fn("");
+
+	for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
+		for (tpc_index = 0; tpc_index < gr->gpc_tpc_count[gpc_index];
+								tpc_index++) {
+			g->gr.sm_to_cluster[sm_id].tpc_index = tpc_index;
+			g->gr.sm_to_cluster[sm_id].gpc_index = gpc_index;
+
+			sm_id++;
+		}
+	}
+
+	gr->no_of_sm = sm_id;
+	return 0;
+}
+
 void vgpu_gm20b_init_gr_ops(struct gpu_ops *gops)
 {
 	gops->gr.detect_sm_arch = vgpu_gm20b_detect_sm_arch;
+	gops->gr.init_fs_state = vgpu_gm20b_init_fs_state;
 }
diff --git a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c
index 16d51ad3..420c714e 100644
--- a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c
@@ -567,6 +567,19 @@ static int vgpu_gr_free_obj_ctx(struct channel_gk20a  *c,
 	return 0;
 }
 
+static u32 vgpu_gr_get_gpc_tpc_count(struct gk20a *g, u32 gpc_index)
+{
+	struct gk20a_platform *platform = gk20a_get_platform(g->dev);
+	u32 data;
+
+	WARN_ON(gpc_index > 0);
+
+	if (vgpu_get_attribute(platform->virt_handle,
+			TEGRA_VGPU_ATTRIB_GPC0_TPC_COUNT, &data))
+		gk20a_err(dev_from_gk20a(g), "failed to retrieve gpc0_tpc_count");
+	return data;
+}
+
 static int vgpu_gr_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
 {
 	struct gk20a_platform *platform = gk20a_get_platform(g->dev);
@@ -593,13 +606,23 @@ static int vgpu_gr_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
 			&gr->tpc_count))
 		return -ENOMEM;
 
+	gr->gpc_tpc_count = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
+	if (!gr->gpc_tpc_count)
+		goto cleanup;
+
 	gr->gpc_tpc_mask = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
-	if (!gr->gpc_tpc_mask) {
-		gk20a_err(dev_from_gk20a(g), "%s: out of memory\n", __func__);
-		return -ENOMEM;
-	}
+	if (!gr->gpc_tpc_mask)
+		goto cleanup;
+
+	gr->sm_to_cluster = kzalloc(gr->gpc_count * gr->max_tpc_per_gpc_count *
+				sizeof(struct sm_info), GFP_KERNEL);
+	if (!gr->sm_to_cluster)
+		goto cleanup;
 
 	for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
+		gr->gpc_tpc_count[gpc_index] =
+			vgpu_gr_get_gpc_tpc_count(g, gpc_index);
+
 		if (g->ops.gr.get_gpc_tpc_mask)
 			gr->gpc_tpc_mask[gpc_index] =
 				g->ops.gr.get_gpc_tpc_mask(g, gpc_index);
@@ -608,7 +631,18 @@ static int vgpu_gr_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
 	g->ops.gr.bundle_cb_defaults(g);
 	g->ops.gr.cb_size_default(g);
 	g->ops.gr.calc_global_ctx_buffer_size(g);
+	g->ops.gr.init_fs_state(g);
 	return 0;
+cleanup:
+	gk20a_err(dev_from_gk20a(g), "%s: out of memory\n", __func__);
+
+	kfree(gr->gpc_tpc_count);
+	gr->gpc_tpc_count = NULL;
+
+	kfree(gr->gpc_tpc_mask);
+	gr->gpc_tpc_mask = NULL;
+
+	return -ENOMEM;
 }
 
 static int vgpu_gr_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr,
@@ -823,6 +857,12 @@ static void vgpu_remove_gr_support(struct gr_gk20a *gr)
 
 	kfree(gr->gpc_tpc_mask);
 	gr->gpc_tpc_mask = NULL;
+
+	kfree(gr->sm_to_cluster);
+	gr->sm_to_cluster = NULL;
+
+	kfree(gr->gpc_tpc_count);
+	gr->gpc_tpc_count = NULL;
 }
 
 static int vgpu_gr_init_gr_setup_sw(struct gk20a *g)
diff --git a/include/linux/tegra_vgpu.h b/include/linux/tegra_vgpu.h
index 67bd0d76..d517fabc 100644
--- a/include/linux/tegra_vgpu.h
+++ b/include/linux/tegra_vgpu.h
@@ -117,7 +117,8 @@ enum {
 	TEGRA_VGPU_ATTRIB_COMPTAGS_PER_CACHELINE,
 	TEGRA_VGPU_ATTRIB_SLICES_PER_LTC,
 	TEGRA_VGPU_ATTRIB_LTC_COUNT,
-	TEGRA_VGPU_ATTRIB_TPC_COUNT
+	TEGRA_VGPU_ATTRIB_TPC_COUNT,
+	TEGRA_VGPU_ATTRIB_GPC0_TPC_COUNT,
 };
 
 struct tegra_vgpu_attrib_params {
-- 
cgit v1.2.2