From 34732a14b22f09d8f9d52f756612178f0313f120 Mon Sep 17 00:00:00 2001
From: Deepak Goyal <dgoyal@nvidia.com>
Date: Fri, 14 Sep 2018 11:45:19 +0530
Subject: nvgpu: gpu: Support multiple tpc-pg masks.

- TPC powergating should be done before
  calling gk20a_enable_gr_hw.
  gk20a_enable_gr_hw() issues a GR engine reset.

  Without this fix, enabling 1 TPC from each PES
  causes ctxsw timeout error while running GFX Benchmark.

- Adds valid tpc-pg mask for 1/2/3/4 active TPC configs.
    TPC Config - TPC-MASK
  4 TPC configuration - 0x0
  3 TPC configuration - 0x1/0x2/0x4/0x8
  2 TPC configuration - 0x5/0x9/0x6/0xa

- We should not write to gr_fe_tpc_pesmask_r()
  as part of TPC-PG sequence. This register is for
  debug purpose only.

Bug 200442360

Change-Id: I6fbe1ad8fbc836ace8cbaf00ec3d21a12c73e0bd
Signed-off-by: Deepak Goyal <dgoyal@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1809772
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/gk20a/gk20a.c                   | 30 +++++++++++++++++------
 drivers/gpu/nvgpu/gv11b/gr_gv11b.c                | 21 ----------------
 drivers/gpu/nvgpu/include/nvgpu/gk20a.h           |  4 ++-
 drivers/gpu/nvgpu/os/linux/driver_common.c        |  6 ++++-
 drivers/gpu/nvgpu/os/linux/platform_gk20a.h       |  2 +-
 drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c |  4 ++-
 drivers/gpu/nvgpu/os/linux/sysfs.c                | 15 ++++++++++--
 7 files changed, 47 insertions(+), 35 deletions(-)

(limited to 'drivers/gpu/nvgpu')

diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index 9958d24f..1cad8dcb 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -259,9 +259,28 @@ int gk20a_finalize_poweron(struct gk20a *g)
 
 	g->ops.mc.intr_enable(g);
 
+	/*
+	 *  Overwrite can_tpc_powergate to false if the chip is ES fused and
+	 *  already optimized with some TPCs already floorswept
+	 *  via fuse. We will not support TPC-PG in those cases.
+	 */
+
+	if (g->ops.fuse.fuse_status_opt_tpc_gpc(g, 0) != 0x0) {
+		g->can_tpc_powergate = false;
+		g->tpc_pg_mask = 0x0;
+	}
+
+	nvgpu_mutex_acquire(&g->tpc_pg_lock);
+
+	if (g->can_tpc_powergate) {
+		if (g->ops.gr.powergate_tpc != NULL)
+			g->ops.gr.powergate_tpc(g);
+	}
+
 	err = gk20a_enable_gr_hw(g);
 	if (err) {
 		nvgpu_err(g, "failed to enable gr");
+		nvgpu_mutex_release(&g->tpc_pg_lock);
 		goto done;
 	}
 
@@ -271,6 +290,7 @@ int gk20a_finalize_poweron(struct gk20a *g)
 		}
 		if (err) {
 			nvgpu_err(g, "failed to init pmu ucode");
+			nvgpu_mutex_release(&g->tpc_pg_lock);
 			goto done;
 		}
 	}
@@ -279,6 +299,7 @@ int gk20a_finalize_poweron(struct gk20a *g)
 		err = gk20a_init_pstate_support(g);
 		if (err) {
 			nvgpu_err(g, "failed to init pstates");
+			nvgpu_mutex_release(&g->tpc_pg_lock);
 			goto done;
 		}
 	}
@@ -296,18 +317,11 @@ int gk20a_finalize_poweron(struct gk20a *g)
 		err = nvgpu_init_pmu_support(g);
 		if (err) {
 			nvgpu_err(g, "failed to init gk20a pmu");
+			nvgpu_mutex_release(&g->tpc_pg_lock);
 			goto done;
 		}
 	}
 
-	nvgpu_mutex_acquire(&g->tpc_pg_lock);
-
-	if (g->can_tpc_powergate) {
-		if (g->ops.gr.powergate_tpc != NULL) {
-			g->ops.gr.powergate_tpc(g);
-		}
-	}
-
 	err = gk20a_init_gr_support(g);
 	if (err) {
 		nvgpu_err(g, "failed to init gk20a gr");
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
index 3dedc6b5..288bd583 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
@@ -73,16 +73,6 @@
  */
 #define GR_TPCS_INFO_FOR_MAPREGISTER 6
 
-/*
- * There are 4 TPCs in GV11b ranging from TPC0 to TPC3
- * There are two PES in GV11b each controlling two TPCs
- * PES0 is linked to TPC0 & TPC2
- * PES1 is linked to TPC1 & TPC3
- */
-#define TPC_MASK_FOR_PESID_0   (u32) 0x5
-#define TPC_MASK_FOR_PESID_1   (u32) 0xa
-
-
 bool gr_gv11b_is_valid_class(struct gk20a *g, u32 class_num)
 {
 	bool valid = false;
@@ -143,17 +133,6 @@ void gr_gv11b_powergate_tpc(struct gk20a *g)
 		tpc_pg_status = g->ops.fuse.fuse_status_opt_tpc_gpc(g, 0);
 	} while (tpc_pg_status != g->tpc_pg_mask);
 
-	gk20a_writel(g, gr_fe_tpc_pesmask_r(), gr_fe_tpc_pesmask_req_send_f() |
-			gr_fe_tpc_pesmask_action_write_f() |
-			gr_fe_tpc_pesmask_pesid_f(0) |
-			gr_fe_tpc_pesmask_gpcid_f(0) |
-			((~g->tpc_pg_mask & (u32) 0xf) & TPC_MASK_FOR_PESID_0));
-	gk20a_writel(g, gr_fe_tpc_pesmask_r(), gr_fe_tpc_pesmask_req_send_f() |
-			gr_fe_tpc_pesmask_action_write_f() |
-			gr_fe_tpc_pesmask_pesid_f(1) |
-			gr_fe_tpc_pesmask_gpcid_f(0) |
-			((~g->tpc_pg_mask & (u32) 0xf) & TPC_MASK_FOR_PESID_1));
-
 	return;
 }
 
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
index 2ebe0011..8d7ccfa8 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
@@ -138,6 +138,8 @@ enum gk20a_cbc_op {
 
 #define nvgpu_get_litter_value(g, v) (g)->ops.get_litter_value((g), v)
 
+#define MAX_TPC_PG_CONFIGS      3
+
 enum nvgpu_unit;
 
 enum nvgpu_flush_op;
@@ -1592,7 +1594,7 @@ struct gk20a {
 	u32 tpc_pg_mask;
 	bool can_tpc_powergate;
 
-	u32 valid_tpc_mask;
+	u32 valid_tpc_mask[MAX_TPC_PG_CONFIGS];
 
 	struct nvgpu_bios bios;
 	bool bios_is_init;
diff --git a/drivers/gpu/nvgpu/os/linux/driver_common.c b/drivers/gpu/nvgpu/os/linux/driver_common.c
index 9ff32d68..539f0559 100644
--- a/drivers/gpu/nvgpu/os/linux/driver_common.c
+++ b/drivers/gpu/nvgpu/os/linux/driver_common.c
@@ -135,6 +135,7 @@ static void nvgpu_init_timeslice(struct gk20a *g)
 static void nvgpu_init_pm_vars(struct gk20a *g)
 {
 	struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g));
+	u32 i = 0;
 
 	/*
 	 * Set up initial power settings. For non-slicon platforms, disable
@@ -172,7 +173,10 @@ static void nvgpu_init_pm_vars(struct gk20a *g)
 	g->support_pmu = support_gk20a_pmu(dev_from_gk20a(g));
 	__nvgpu_set_enabled(g, NVGPU_CAN_RAILGATE, platform->can_railgate_init);
 	g->can_tpc_powergate = platform->can_tpc_powergate;
-	g->valid_tpc_mask = platform->valid_tpc_mask;
+
+	for (i = 0; i < MAX_TPC_PG_CONFIGS; i++)
+		g->valid_tpc_mask[i] = platform->valid_tpc_mask[i];
+
 	g->ldiv_slowdown_factor = platform->ldiv_slowdown_factor_init;
 	/* if default delay is not set, set default delay to 500msec */
 	if (platform->railgate_delay_init)
diff --git a/drivers/gpu/nvgpu/os/linux/platform_gk20a.h b/drivers/gpu/nvgpu/os/linux/platform_gk20a.h
index 3a0227a1..a19d0a7c 100644
--- a/drivers/gpu/nvgpu/os/linux/platform_gk20a.h
+++ b/drivers/gpu/nvgpu/os/linux/platform_gk20a.h
@@ -74,7 +74,7 @@ struct gk20a_platform {
 	struct reset_control *reset_control;
 #endif
 	/* valid TPC-MASK */
-	u32 valid_tpc_mask;
+	u32 valid_tpc_mask[MAX_TPC_PG_CONFIGS];
 
 	/* Delay before rail gated */
 	int railgate_delay_init;
diff --git a/drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c b/drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c
index 41cba0f0..dafa05e5 100644
--- a/drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c
+++ b/drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c
@@ -233,7 +233,9 @@ struct gk20a_platform gv11b_tegra_platform = {
 	.can_railgate_init      = true,
 
 	.can_tpc_powergate      = true,
-	.valid_tpc_mask         = 0xc,
+	.valid_tpc_mask[0]      = 0x0,
+	.valid_tpc_mask[1]      = 0x1,
+	.valid_tpc_mask[2]      = 0x5,
 
 	.can_slcg               = true,
 	.can_blcg               = true,
diff --git a/drivers/gpu/nvgpu/os/linux/sysfs.c b/drivers/gpu/nvgpu/os/linux/sysfs.c
index 9e48e45d..1ffb6539 100644
--- a/drivers/gpu/nvgpu/os/linux/sysfs.c
+++ b/drivers/gpu/nvgpu/os/linux/sysfs.c
@@ -865,6 +865,18 @@ static ssize_t tpc_pg_mask_read(struct device *dev,
 	return snprintf(buf, PAGE_SIZE, "%d\n", g->tpc_pg_mask);
 }
 
+static bool is_tpc_mask_valid(struct gk20a *g, u32 tpc_mask)
+{
+	u32 i;
+	bool valid = false;
+
+	for (i = 0; i < MAX_TPC_PG_CONFIGS; i++) {
+		if (tpc_mask == g->valid_tpc_mask[i])
+			valid = true;
+	}
+	return valid;
+}
+
 static ssize_t tpc_pg_mask_store(struct device *dev,
 	struct device_attribute *attr, const char *buf, size_t count)
 {
@@ -896,10 +908,9 @@ static ssize_t tpc_pg_mask_store(struct device *dev,
 		return -ENODEV;
 	}
 
-	if (val == TPC_MASK_FOR_ALL_ACTIVE_TPCs || val == g->valid_tpc_mask) {
+	if (is_tpc_mask_valid(g, (u32)val)) {
 		g->tpc_pg_mask = val;
 	} else {
-
 		nvgpu_err(g, "TPC-PG mask is invalid");
 		nvgpu_mutex_release(&g->tpc_pg_lock);
 		return -EINVAL;
-- 
cgit v1.2.2