From 1c9aaa1eafcf91fbc29404b449f2bec072c804a5 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Fri, 25 Apr 2014 15:00:54 +0300
Subject: gpu: nvgpu: Implement ELPG flush for gm20b

ELPG flush is initiated from a common broadcast register, but must be
waited on via per-L2 registers. Split gk20a and gm20b versions of
the flush.

Change-Id: I75c2d65e8da311b50d35bee70308b60464ec2d4d
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/401545
Reviewed-by: Automatic_Commit_Validation_User
---
 drivers/gpu/nvgpu/gk20a/ltc_common.c   | 34 -------------------------
 drivers/gpu/nvgpu/gk20a/ltc_gk20a.c    | 34 +++++++++++++++++++++++++
 drivers/gpu/nvgpu/gm20b/hw_ltc_gm20b.h | 20 +++++++++++++--
 drivers/gpu/nvgpu/gm20b/ltc_gm20b.c    | 46 +++++++++++++++++++++++++++++++++-
 4 files changed, 97 insertions(+), 37 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gk20a/ltc_common.c b/drivers/gpu/nvgpu/gk20a/ltc_common.c
index ac46a9a0..72477983 100644
--- a/drivers/gpu/nvgpu/gk20a/ltc_common.c
+++ b/drivers/gpu/nvgpu/gk20a/ltc_common.c
@@ -313,37 +313,3 @@ static void gk20a_ltc_init_cbc(struct gk20a *g, struct gr_gk20a *gr)
 			    0, max_comptag_lines - 1);
 
 }
-
-/* Flushes the compression bit cache as well as "data".
- * Note: the name here is a bit of a misnomer.  ELPG uses this
- * internally... but ELPG doesn't have to be on to do it manually.
- */
-static void gk20a_mm_g_elpg_flush_locked(struct gk20a *g)
-{
-	u32 data;
-	s32 retry = 100;
-
-	gk20a_dbg_fn("");
-
-	/* Make sure all previous writes are committed to the L2. There's no
-	   guarantee that writes are to DRAM. This will be a sysmembar internal
-	   to the L2. */
-	gk20a_writel(g, ltc_ltcs_ltss_g_elpg_r(),
-		     ltc_ltcs_ltss_g_elpg_flush_pending_f());
-	do {
-		data = gk20a_readl(g, ltc_ltc0_ltss_g_elpg_r());
-
-		if (ltc_ltc0_ltss_g_elpg_flush_v(data) ==
-		    ltc_ltc0_ltss_g_elpg_flush_pending_v()) {
-			gk20a_dbg_info("g_elpg_flush 0x%x", data);
-			retry--;
-			usleep_range(20, 40);
-		} else
-			break;
-	} while (retry >= 0 || !tegra_platform_is_silicon());
-
-	if (retry < 0)
-		gk20a_warn(dev_from_gk20a(g),
-			    "g_elpg_flush too many retries");
-
-}
diff --git a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c
index c1ba2aee..9f5317fc 100644
--- a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c
@@ -212,6 +212,40 @@ void gk20a_ltc_isr(struct gk20a *g)
 	gk20a_writel(g, ltc_ltc0_ltss_intr_r(), intr);
 }
 
+/* Flushes the compression bit cache as well as "data".
+ * Note: the name here is a bit of a misnomer.  ELPG uses this
+ * internally... but ELPG doesn't have to be on to do it manually.
+ */
+static void gk20a_mm_g_elpg_flush_locked(struct gk20a *g)
+{
+	u32 data;
+	s32 retry = 100;
+
+	gk20a_dbg_fn("");
+
+	/* Make sure all previous writes are committed to the L2. There's no
+	   guarantee that writes are to DRAM. This will be a sysmembar internal
+	   to the L2. */
+	gk20a_writel(g, ltc_ltcs_ltss_g_elpg_r(),
+		     ltc_ltcs_ltss_g_elpg_flush_pending_f());
+	do {
+		data = gk20a_readl(g, ltc_ltc0_ltss_g_elpg_r());
+
+		if (ltc_ltc0_ltss_g_elpg_flush_v(data) ==
+		    ltc_ltc0_ltss_g_elpg_flush_pending_v()) {
+			gk20a_dbg_info("g_elpg_flush 0x%x", data);
+			retry--;
+			usleep_range(20, 40);
+		} else
+			break;
+	} while (retry >= 0 || !tegra_platform_is_silicon());
+
+	if (retry < 0)
+		gk20a_warn(dev_from_gk20a(g),
+			    "g_elpg_flush too many retries");
+
+}
+
 void gk20a_init_ltc(struct gpu_ops *gops)
 {
 	gops->ltc.determine_L2_size_bytes = gk20a_determine_L2_size_bytes;
diff --git a/drivers/gpu/nvgpu/gm20b/hw_ltc_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_ltc_gm20b.h
index 28c58f50..9840805d 100644
--- a/drivers/gpu/nvgpu/gm20b/hw_ltc_gm20b.h
+++ b/drivers/gpu/nvgpu/gm20b/hw_ltc_gm20b.h
@@ -96,11 +96,11 @@ static inline u32 ltc_ltcs_ltss_cbc_ctrl1_r(void)
 }
 static inline u32 ltc_ltcs_ltss_cbc_ctrl1_clean_active_f(void)
 {
-        return 0x1;
+	return 0x1;
 }
 static inline u32 ltc_ltcs_ltss_cbc_ctrl1_invalidate_active_f(void)
 {
-        return 0x2;
+	return 0x2;
 }
 static inline u32 ltc_ltcs_ltss_cbc_ctrl1_clear_v(u32 r)
 {
@@ -258,6 +258,22 @@ static inline u32 ltc_ltc0_ltss_g_elpg_flush_pending_f(void)
 {
 	return 0x1;
 }
+static inline u32 ltc_ltc1_ltss_g_elpg_r(void)
+{
+	return 0x00142214;
+}
+static inline u32 ltc_ltc1_ltss_g_elpg_flush_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+static inline u32 ltc_ltc1_ltss_g_elpg_flush_pending_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 ltc_ltc1_ltss_g_elpg_flush_pending_f(void)
+{
+	return 0x1;
+}
 static inline u32 ltc_ltc0_ltss_intr_r(void)
 {
 	return 0x0014020c;
diff --git a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
index 5da21c64..43c90970 100644
--- a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
@@ -193,6 +193,50 @@ void gm20b_ltc_isr(struct gk20a *g)
 	gk20a_writel(g, ltc_ltc0_ltss_intr_r(), intr);
 }
 
+static void gm20b_ltc_g_elpg_flush_locked(struct gk20a *g)
+{
+	u32 data;
+	bool done[g->ltc_count];
+	s32 retry = 100;
+	int i;
+	int num_done = 0;
+	u32 ltc_d = ltc_ltc1_ltss_g_elpg_r() - ltc_ltc0_ltss_g_elpg_r();
+
+	gk20a_dbg_fn("");
+
+	for (i = 0; i < g->ltc_count; i++)
+		done[i] = 0;
+
+	gk20a_writel(g, ltc_ltcs_ltss_g_elpg_r(),
+		     ltc_ltcs_ltss_g_elpg_flush_pending_f());
+	do {
+		for (i = 0; i < g->ltc_count; i++) {
+			if (done[i])
+				continue;
+
+			data = gk20a_readl(g,
+					ltc_ltc0_ltss_g_elpg_r() + ltc_d * i);
+
+			if (ltc_ltc0_ltss_g_elpg_flush_v(data)) {
+				gk20a_dbg_info("g_elpg_flush 0x%x", data);
+			} else {
+				done[i] = 1;
+				num_done++;
+			}
+		}
+
+		if (num_done < g->ltc_count) {
+			retry--;
+			usleep_range(20, 40);
+		} else
+			break;
+	} while (retry >= 0 || !tegra_platform_is_silicon());
+
+	if (retry < 0)
+		gk20a_warn(dev_from_gk20a(g),
+			    "g_elpg_flush too many retries");
+}
+
 void gm20b_init_ltc(struct gpu_ops *gops)
 {
 	/* Gk20a reused ops. */
@@ -209,6 +253,6 @@ void gm20b_init_ltc(struct gpu_ops *gops)
 	gops->ltc.init_fs_state = gm20b_ltc_init_fs_state;
 	gops->ltc.init_comptags = gm20b_ltc_init_comptags;
 	gops->ltc.cbc_ctrl = gm20b_ltc_cbc_ctrl;
-	gops->ltc.elpg_flush = gk20a_mm_g_elpg_flush_locked;
+	gops->ltc.elpg_flush = gm20b_ltc_g_elpg_flush_locked;
 	gops->ltc.isr = gm20b_ltc_isr;
 }
-- 
cgit v1.2.2