From d3b8415948de8c9ffe2f2fa66340dd7e71a894e6 Mon Sep 17 00:00:00 2001
From: Deepak Goyal <dgoyal@nvidia.com>
Date: Mon, 16 Jul 2018 11:10:23 +0530
Subject: gpu: nvgpu: tpc powergating through sysfs

- adds static tpc-powergating through sysfs.
- active tpc count will remain till the GPU/systems is not booted again.
- tpc_pg_mask can be written only after GPU probe finishes and
  GPU boot is triggered.

Note:
To be able to use this feature, we need to change boot/init
scripts of the OS(used with nvgpu driver) to write to sysfs nodes before
posting discover image size query to FECS.

Bug 200406784

Change-Id: Id749c7a617422c625f77d0c1a9aada2eb960c4d0
Signed-off-by: Deepak Goyal <dgoyal@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1742422
Reviewed-by: svc-misra-checker <svc-misra-checker@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/gk20a/gk20a.c                    | 11 ++++
 drivers/gpu/nvgpu/gk20a/gk20a.h                    |  8 +++
 drivers/gpu/nvgpu/gv11b/gr_gv11b.c                 | 40 +++++++++++++++
 drivers/gpu/nvgpu/gv11b/gr_gv11b.h                 |  1 +
 drivers/gpu/nvgpu/gv11b/hal_gv11b.c                |  1 +
 .../gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h | 36 +++++++++++++
 drivers/gpu/nvgpu/os/linux/driver_common.c         |  3 ++
 drivers/gpu/nvgpu/os/linux/platform_gk20a.h        |  5 ++
 drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c  |  3 ++
 drivers/gpu/nvgpu/os/linux/sysfs.c                 | 59 ++++++++++++++++++++++
 10 files changed, 167 insertions(+)

(limited to 'drivers/gpu/nvgpu')

diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index 08e8b79f..ed48253f 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -274,12 +274,23 @@ int gk20a_finalize_poweron(struct gk20a *g)
 		}
 	}
 
+	nvgpu_mutex_acquire(&g->tpc_pg_lock);
+
+	if (g->can_tpc_powergate) {
+		if (g->ops.gr.powergate_tpc != NULL) {
+			g->ops.gr.powergate_tpc(g);
+		}
+	}
+
 	err = gk20a_init_gr_support(g);
 	if (err) {
 		nvgpu_err(g, "failed to init gk20a gr");
+		nvgpu_mutex_release(&g->tpc_pg_lock);
 		goto done;
 	}
 
+	nvgpu_mutex_release(&g->tpc_pg_lock);
+
 	if (nvgpu_is_enabled(g, NVGPU_PMU_PSTATE)) {
 		err = gk20a_init_pstate_pmu_support(g);
 		if (err) {
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 948d8e60..4934958c 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -307,6 +307,7 @@ struct gpu_ops {
 			  u32 class, u32 padding);
 		void (*free_gr_ctx)(struct gk20a *g,
 				    struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx);
+		void (*powergate_tpc)(struct gk20a *g);
 		void (*update_ctxsw_preemption_mode)(struct gk20a *g,
 				struct channel_gk20a *c,
 				struct nvgpu_mem *mem);
@@ -1361,6 +1362,8 @@ struct gk20a {
 	u64 log_mask;
 	u32 log_trace;
 
+	struct nvgpu_mutex tpc_pg_lock;
+
 	struct nvgpu_gpu_params params;
 
 	/*
@@ -1532,6 +1535,11 @@ struct gk20a {
 
 	u32 tpc_fs_mask_user;
 
+	u32 tpc_pg_mask;
+	bool can_tpc_powergate;
+
+	u32 valid_tpc_mask;
+
 	struct nvgpu_bios bios;
 	bool bios_is_init;
 
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
index c2f47a20..51588f1f 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
@@ -58,6 +58,7 @@
 #include <nvgpu/hw/gv11b/hw_pbdma_gv11b.h>
 #include <nvgpu/hw/gv11b/hw_therm_gv11b.h>
 #include <nvgpu/hw/gv11b/hw_perf_gv11b.h>
+#include <nvgpu/hw/gv11b/hw_fuse_gv11b.h>
 
 #define GFXP_WFI_TIMEOUT_COUNT_IN_USEC_DEFAULT 100
 
@@ -71,6 +72,16 @@
  */
 #define GR_TPCS_INFO_FOR_MAPREGISTER 6
 
+/*
+ * There are 4 TPCs in GV11b ranging from TPC0 to TPC3
+ * There are two PES in GV11b each controlling two TPCs
+ * PES0 is linked to TPC0 & TPC2
+ * PES1 is linked to TPC1 & TPC3
+ */
+#define TPC_MASK_FOR_PESID_0   (u32) 0x5
+#define TPC_MASK_FOR_PESID_1   (u32) 0xa
+
+
 bool gr_gv11b_is_valid_class(struct gk20a *g, u32 class_num)
 {
 	bool valid = false;
@@ -117,6 +128,35 @@ bool gr_gv11b_is_valid_gfx_class(struct gk20a *g, u32 class_num)
 	return valid;
 }
 
+void gr_gv11b_powergate_tpc(struct gk20a *g)
+{
+	u32 tpc_pg_status = gk20a_readl(g, fuse_status_opt_tpc_gpc_r(0));
+
+	if (tpc_pg_status == g->tpc_pg_mask) {
+		nvgpu_info(g, "TPC-PG mask and TPC-PG status is same");
+		return;
+	}
+
+	gk20a_writel(g, fuse_ctrl_opt_tpc_gpc_r(0), (g->tpc_pg_mask));
+
+	do {
+		tpc_pg_status = gk20a_readl(g, fuse_status_opt_tpc_gpc_r(0));
+	} while (tpc_pg_status != g->tpc_pg_mask);
+
+	gk20a_writel(g, gr_fe_tpc_pesmask_r(), gr_fe_tpc_pesmask_req_send_f() |
+			gr_fe_tpc_pesmask_action_write_f() |
+			gr_fe_tpc_pesmask_pesid_f(0) |
+			gr_fe_tpc_pesmask_gpcid_f(0) |
+			((~g->tpc_pg_mask & (u32) 0xf) & TPC_MASK_FOR_PESID_0));
+	gk20a_writel(g, gr_fe_tpc_pesmask_r(), gr_fe_tpc_pesmask_req_send_f() |
+			gr_fe_tpc_pesmask_action_write_f() |
+			gr_fe_tpc_pesmask_pesid_f(1) |
+			gr_fe_tpc_pesmask_gpcid_f(0) |
+			((~g->tpc_pg_mask & (u32) 0xf) & TPC_MASK_FOR_PESID_1));
+
+	return;
+}
+
 bool gr_gv11b_is_valid_compute_class(struct gk20a *g, u32 class_num)
 {
 	bool valid = false;
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h
index a8dbd3a8..f799ccfe 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h
@@ -250,4 +250,5 @@ int gr_gv11b_create_priv_addr_table(struct gk20a *g,
 	u32 *priv_addr_table,
 	u32 *num_registers);
 u32 gr_gv11b_get_nonpes_aware_tpc(struct gk20a *g, u32 gpc, u32 tpc);
+void gr_gv11b_powergate_tpc(struct gk20a *g);
 #endif
diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
index d479fef8..08c3097e 100644
--- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
@@ -294,6 +294,7 @@ static const struct gpu_ops gv11b_ops = {
 		.init_ctx_state = gr_gp10b_init_ctx_state,
 		.alloc_gr_ctx = gr_gp10b_alloc_gr_ctx,
 		.free_gr_ctx = gr_gk20a_free_gr_ctx,
+		.powergate_tpc = gr_gv11b_powergate_tpc,
 		.update_ctxsw_preemption_mode =
 			gr_gv11b_update_ctxsw_preemption_mode,
 		.dump_gr_regs = gr_gv11b_dump_gr_status_regs,
diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h
index 90994a53..473eaff4 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h
@@ -1572,6 +1572,42 @@ static inline u32 gr_fe_tpc_fs_r(u32 i)
 {
 	return 0x0040a200U + i*4U;
 }
+static inline u32 gr_fe_tpc_pesmask_r(void)
+{
+	return 0x0040a260U;
+}
+static inline u32 gr_fe_tpc_pesmask_pesid_f(u32 v)
+{
+	return (v & 0x3fU) << 24U;
+}
+static inline u32 gr_fe_tpc_pesmask_gpcid_f(u32 v)
+{
+	return (v & 0xffU) << 16U;
+}
+static inline u32 gr_fe_tpc_pesmask_action_m(void)
+{
+	return 0x1U << 30U;
+}
+static inline u32 gr_fe_tpc_pesmask_action_write_f(void)
+{
+	return 0x40000000U;
+}
+static inline u32 gr_fe_tpc_pesmask_action_read_f(void)
+{
+	return 0x0U;
+}
+static inline u32 gr_fe_tpc_pesmask_req_m(void)
+{
+	return 0x1U << 31U;
+}
+static inline u32 gr_fe_tpc_pesmask_req_send_f(void)
+{
+	return 0x80000000U;
+}
+static inline u32 gr_fe_tpc_pesmask_mask_m(void)
+{
+	return 0xffffU << 0U;
+}
 static inline u32 gr_pri_mme_shadow_raw_index_r(void)
 {
 	return 0x00404488U;
diff --git a/drivers/gpu/nvgpu/os/linux/driver_common.c b/drivers/gpu/nvgpu/os/linux/driver_common.c
index 8a7cf552..c651e394 100644
--- a/drivers/gpu/nvgpu/os/linux/driver_common.c
+++ b/drivers/gpu/nvgpu/os/linux/driver_common.c
@@ -64,6 +64,7 @@ static void nvgpu_init_vars(struct gk20a *g)
 	nvgpu_mutex_init(&g->poweron_lock);
 	nvgpu_mutex_init(&g->poweroff_lock);
 	nvgpu_mutex_init(&g->ctxsw_disable_lock);
+	nvgpu_mutex_init(&g->tpc_pg_lock);
 
 	l->regs_saved = l->regs;
 	l->bar1_saved = l->bar1;
@@ -168,6 +169,8 @@ static void nvgpu_init_pm_vars(struct gk20a *g)
 	g->ptimer_src_freq = platform->ptimer_src_freq;
 	g->support_pmu = support_gk20a_pmu(dev_from_gk20a(g));
 	__nvgpu_set_enabled(g, NVGPU_CAN_RAILGATE, platform->can_railgate_init);
+	g->can_tpc_powergate = platform->can_tpc_powergate;
+	g->valid_tpc_mask = platform->valid_tpc_mask;
 	g->ldiv_slowdown_factor = platform->ldiv_slowdown_factor_init;
 	/* if default delay is not set, set default delay to 500msec */
 	if (platform->railgate_delay_init)
diff --git a/drivers/gpu/nvgpu/os/linux/platform_gk20a.h b/drivers/gpu/nvgpu/os/linux/platform_gk20a.h
index d9725e4c..a4c3eca3 100644
--- a/drivers/gpu/nvgpu/os/linux/platform_gk20a.h
+++ b/drivers/gpu/nvgpu/os/linux/platform_gk20a.h
@@ -43,6 +43,9 @@ struct gk20a_platform {
 	/* Should be populated at probe. */
 	bool can_railgate_init;
 
+	/* Should be populated at probe. */
+	bool can_tpc_powergate;
+
 	/* Should be populated at probe. */
 	bool can_elpg_init;
 
@@ -71,6 +74,8 @@ struct gk20a_platform {
 	/* Reset control for device */
 	struct reset_control *reset_control;
 #endif
+	/* valid TPC-MASK */
+	u32 valid_tpc_mask;
 
 	/* Delay before rail gated */
 	int railgate_delay_init;
diff --git a/drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c b/drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c
index c9c13197..edb3263c 100644
--- a/drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c
+++ b/drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c
@@ -209,6 +209,9 @@ struct gk20a_platform gv11b_tegra_platform = {
 	.railgate_delay_init    = 500,
 	.can_railgate_init      = true,
 
+	.can_tpc_powergate      = true,
+	.valid_tpc_mask         = 0xc,
+
 	.can_slcg               = true,
 	.can_blcg               = true,
 	.can_elcg               = true,
diff --git a/drivers/gpu/nvgpu/os/linux/sysfs.c b/drivers/gpu/nvgpu/os/linux/sysfs.c
index 1b84e2e2..b607768a 100644
--- a/drivers/gpu/nvgpu/os/linux/sysfs.c
+++ b/drivers/gpu/nvgpu/os/linux/sysfs.c
@@ -31,6 +31,8 @@
 
 #define ROOTRW (S_IRWXU|S_IRGRP|S_IROTH)
 
+#define TPC_MASK_FOR_ALL_ACTIVE_TPCs           (u32) 0x0
+
 static ssize_t elcg_enable_store(struct device *dev,
 	struct device_attribute *attr, const char *buf, size_t count)
 {
@@ -843,6 +845,61 @@ static ssize_t force_idle_read(struct device *dev,
 static DEVICE_ATTR(force_idle, ROOTRW, force_idle_read, force_idle_store);
 #endif
 
+static ssize_t tpc_pg_mask_read(struct device *dev,
+	struct device_attribute *attr, char *buf)
+{
+	struct gk20a *g = get_gk20a(dev);
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", g->tpc_pg_mask);
+}
+
+static ssize_t tpc_pg_mask_store(struct device *dev,
+	struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct gk20a *g = get_gk20a(dev);
+	struct gr_gk20a *gr = &g->gr;
+	unsigned long val = 0;
+
+	nvgpu_mutex_acquire(&g->tpc_pg_lock);
+
+	if (!g->can_tpc_powergate) {
+		nvgpu_info(g, "TPC-PG not enabled for the platform");
+		goto exit;
+	}
+
+	if (kstrtoul(buf, 10, &val) < 0) {
+		nvgpu_err(g, "invalid value");
+		nvgpu_mutex_release(&g->tpc_pg_lock);
+		return -EINVAL;
+	}
+
+	if (val == g->tpc_pg_mask) {
+		nvgpu_info(g, "no value change, same mask already set");
+		goto exit;
+	}
+
+	if (gr->ctx_vars.golden_image_size) {
+		nvgpu_err(g, "golden image size already initialized");
+		nvgpu_mutex_release(&g->tpc_pg_lock);
+		return -ENODEV;
+	}
+
+	if (val == TPC_MASK_FOR_ALL_ACTIVE_TPCs || val == g->valid_tpc_mask) {
+		g->tpc_pg_mask = val;
+	} else {
+
+		nvgpu_err(g, "TPC-PG mask is invalid");
+		nvgpu_mutex_release(&g->tpc_pg_lock);
+		return -EINVAL;
+	}
+exit:
+	nvgpu_mutex_release(&g->tpc_pg_lock);
+
+	return count;
+}
+
+static DEVICE_ATTR(tpc_pg_mask, ROOTRW, tpc_pg_mask_read, tpc_pg_mask_store);
+
 static ssize_t tpc_fs_mask_store(struct device *dev,
 	struct device_attribute *attr, const char *buf, size_t count)
 {
@@ -1130,6 +1187,7 @@ void nvgpu_remove_sysfs(struct device *dev)
 	device_remove_file(dev, &dev_attr_aelpg_enable);
 	device_remove_file(dev, &dev_attr_allow_all);
 	device_remove_file(dev, &dev_attr_tpc_fs_mask);
+	device_remove_file(dev, &dev_attr_tpc_pg_mask);
 	device_remove_file(dev, &dev_attr_min_timeslice_us);
 	device_remove_file(dev, &dev_attr_max_timeslice_us);
 
@@ -1181,6 +1239,7 @@ int nvgpu_create_sysfs(struct device *dev)
 	error |= device_create_file(dev, &dev_attr_aelpg_enable);
 	error |= device_create_file(dev, &dev_attr_allow_all);
 	error |= device_create_file(dev, &dev_attr_tpc_fs_mask);
+	error |= device_create_file(dev, &dev_attr_tpc_pg_mask);
 	error |= device_create_file(dev, &dev_attr_min_timeslice_us);
 	error |= device_create_file(dev, &dev_attr_max_timeslice_us);
 
-- 
cgit v1.2.2