gpu: nvgpu: Implement ELPG flush for gm20b

ELPG flush is initiated from a common broadcast register, but must be waited on via per-L2 registers. Split gk20a and gm20b versions of the flush. Change-Id: I75c2d65e8da311b50d35bee70308b60464ec2d4d Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/401545 Reviewed-by: Automatic_Commit_Validation_User
author: Terje Bergstrom <tbergstrom@nvidia.com> 2014-04-25 08:00:54 -0400
committer: Dan Willemsen <dwillemsen@nvidia.com> 2015-03-18 15:09:57 -0400
commit: 1c9aaa1eafcf91fbc29404b449f2bec072c804a5 (patch)
tree: 702f9933600962f05d0d76a9624a67f027b7bea8
parent: 24fc5e36a7f4fe2f36f78c6c91909595964f1645 (diff)
4 files changed, 97 insertions, 37 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/ltc_common.c b/drivers/gpu/nvgpu/gk20a/ltc_common.c
index ac46a9a0..72477983 100644
--- a/drivers/gpu/nvgpu/gk20a/ltc_common.c
+++ b/drivers/gpu/nvgpu/gk20a/ltc_common.c
@@ -313,37 +313,3 @@ static void gk20a_ltc_init_cbc(struct gk20a *g, struct gr_gk20a *gr)
                            0, max_comptag_lines - 1);
 }
-/* Flushes the compression bit cache as well as "data".
- * Note: the name here is a bit of a misnomer.  ELPG uses this
- * internally... but ELPG doesn't have to be on to do it manually.
- */
-static void gk20a_mm_g_elpg_flush_locked(struct gk20a *g)
-{
-        u32 data;
-        s32 retry = 100;
-        gk20a_dbg_fn("");
-        /* Make sure all previous writes are committed to the L2. There's no
-           guarantee that writes are to DRAM. This will be a sysmembar internal
-           to the L2. */
-        gk20a_writel(g, ltc_ltcs_ltss_g_elpg_r(),
-                     ltc_ltcs_ltss_g_elpg_flush_pending_f());
-        do {
-                data = gk20a_readl(g, ltc_ltc0_ltss_g_elpg_r());
-                if (ltc_ltc0_ltss_g_elpg_flush_v(data) ==
-                    ltc_ltc0_ltss_g_elpg_flush_pending_v()) {
-                        gk20a_dbg_info("g_elpg_flush 0x%x", data);
-                        retry--;
-                        usleep_range(20, 40);
-                } else
-                        break;
-        } while (retry >= 0 || !tegra_platform_is_silicon());
-        if (retry < 0)
-                gk20a_warn(dev_from_gk20a(g),
-                            "g_elpg_flush too many retries");
-}
diff --git a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c
index c1ba2aee..9f5317fc 100644
--- a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c
@@ -212,6 +212,40 @@ void gk20a_ltc_isr(struct gk20a *g)
        gk20a_writel(g, ltc_ltc0_ltss_intr_r(), intr);
 }
+/* Flushes the compression bit cache as well as "data".
+ * Note: the name here is a bit of a misnomer.  ELPG uses this
+ * internally... but ELPG doesn't have to be on to do it manually.
+ */
+static void gk20a_mm_g_elpg_flush_locked(struct gk20a *g)
+{
+        u32 data;
+        s32 retry = 100;
+        gk20a_dbg_fn("");
+        /* Make sure all previous writes are committed to the L2. There's no
+           guarantee that writes are to DRAM. This will be a sysmembar internal
+           to the L2. */
+        gk20a_writel(g, ltc_ltcs_ltss_g_elpg_r(),
+                     ltc_ltcs_ltss_g_elpg_flush_pending_f());
+        do {
+                data = gk20a_readl(g, ltc_ltc0_ltss_g_elpg_r());
+                if (ltc_ltc0_ltss_g_elpg_flush_v(data) ==
+                    ltc_ltc0_ltss_g_elpg_flush_pending_v()) {
+                        gk20a_dbg_info("g_elpg_flush 0x%x", data);
+                        retry--;
+                        usleep_range(20, 40);
+                } else
+                        break;
+        } while (retry >= 0 || !tegra_platform_is_silicon());
+        if (retry < 0)
+                gk20a_warn(dev_from_gk20a(g),
+                            "g_elpg_flush too many retries");
+}
 void gk20a_init_ltc(struct gpu_ops *gops)
 {
        gops->ltc.determine_L2_size_bytes = gk20a_determine_L2_size_bytes;
diff --git a/drivers/gpu/nvgpu/gm20b/hw_ltc_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_ltc_gm20b.h
index 28c58f50..9840805d 100644
--- a/drivers/gpu/nvgpu/gm20b/hw_ltc_gm20b.h
+++ b/drivers/gpu/nvgpu/gm20b/hw_ltc_gm20b.h
@@ -96,11 +96,11 @@ static inline u32 ltc_ltcs_ltss_cbc_ctrl1_r(void)
 }
 static inline u32 ltc_ltcs_ltss_cbc_ctrl1_clean_active_f(void)
 {
-        return 0x1;
+        return 0x1;
 }
 static inline u32 ltc_ltcs_ltss_cbc_ctrl1_invalidate_active_f(void)
 {
-        return 0x2;
+        return 0x2;
 }
 static inline u32 ltc_ltcs_ltss_cbc_ctrl1_clear_v(u32 r)
 {
@@ -258,6 +258,22 @@ static inline u32 ltc_ltc0_ltss_g_elpg_flush_pending_f(void)
 {
        return 0x1;
 }
+static inline u32 ltc_ltc1_ltss_g_elpg_r(void)
+{
+        return 0x00142214;
+}
+static inline u32 ltc_ltc1_ltss_g_elpg_flush_v(u32 r)
+{
+        return (r >> 0) & 0x1;
+}
+static inline u32 ltc_ltc1_ltss_g_elpg_flush_pending_v(void)
+{
+        return 0x00000001;
+}
+static inline u32 ltc_ltc1_ltss_g_elpg_flush_pending_f(void)
+{
+        return 0x1;
+}
 static inline u32 ltc_ltc0_ltss_intr_r(void)
 {
        return 0x0014020c;
diff --git a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
index 5da21c64..43c90970 100644
--- a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
@@ -193,6 +193,50 @@ void gm20b_ltc_isr(struct gk20a *g)
        gk20a_writel(g, ltc_ltc0_ltss_intr_r(), intr);
 }
+static void gm20b_ltc_g_elpg_flush_locked(struct gk20a *g)
+{
+        u32 data;
+        bool done[g->ltc_count];
+        s32 retry = 100;
+        int i;
+        int num_done = 0;
+        u32 ltc_d = ltc_ltc1_ltss_g_elpg_r() - ltc_ltc0_ltss_g_elpg_r();
+        gk20a_dbg_fn("");
+        for (i = 0; i < g->ltc_count; i++)
+                done[i] = 0;
+        gk20a_writel(g, ltc_ltcs_ltss_g_elpg_r(),
+                     ltc_ltcs_ltss_g_elpg_flush_pending_f());
+        do {
+                for (i = 0; i < g->ltc_count; i++) {
+                        if (done[i])
+                                continue;
+                        data = gk20a_readl(g,
+                                        ltc_ltc0_ltss_g_elpg_r() + ltc_d * i);
+                        if (ltc_ltc0_ltss_g_elpg_flush_v(data)) {
+                                gk20a_dbg_info("g_elpg_flush 0x%x", data);
+                        } else {
+                                done[i] = 1;
+                                num_done++;
+                        }
+                }
+                if (num_done < g->ltc_count) {
+                        retry--;
+                        usleep_range(20, 40);
+                } else
+                        break;
+        } while (retry >= 0 || !tegra_platform_is_silicon());
+        if (retry < 0)
+                gk20a_warn(dev_from_gk20a(g),
+                            "g_elpg_flush too many retries");
+}
 void gm20b_init_ltc(struct gpu_ops *gops)
 {
        /* Gk20a reused ops. */
@@ -209,6 +253,6 @@ void gm20b_init_ltc(struct gpu_ops *gops)
        gops->ltc.init_fs_state = gm20b_ltc_init_fs_state;
        gops->ltc.init_comptags = gm20b_ltc_init_comptags;
        gops->ltc.cbc_ctrl = gm20b_ltc_cbc_ctrl;
-        gops->ltc.elpg_flush = gk20a_mm_g_elpg_flush_locked;
+        gops->ltc.elpg_flush = gm20b_ltc_g_elpg_flush_locked;
        gops->ltc.isr = gm20b_ltc_isr;
 }
author	Terje Bergstrom <tbergstrom@nvidia.com>	2014-04-25 08:00:54 -0400
committer	Dan Willemsen <dwillemsen@nvidia.com>	2015-03-18 15:09:57 -0400
commit	1c9aaa1eafcf91fbc29404b449f2bec072c804a5 (patch)
tree	702f9933600962f05d0d76a9624a67f027b7bea8
parent	24fc5e36a7f4fe2f36f78c6c91909595964f1645 (diff)

diff --git a/drivers/gpu/nvgpu/gk20a/ltc_common.c b/drivers/gpu/nvgpu/gk20a/ltc_common.c index ac46a9a0..72477983 100644 --- a/drivers/gpu/nvgpu/gk20a/ltc_common.c +++ b/drivers/gpu/nvgpu/gk20a/ltc_common.c
@@ -313,37 +313,3 @@ static void gk20a_ltc_init_cbc(struct gk20a g, struct gr_gk20a gr)
313	0, max_comptag_lines - 1);	313	0, max_comptag_lines - 1);
314		314
315	}	315	}
316
317	/* Flushes the compression bit cache as well as "data".
318	* Note: the name here is a bit of a misnomer. ELPG uses this
319	* internally... but ELPG doesn't have to be on to do it manually.
320	*/
321	static void gk20a_mm_g_elpg_flush_locked(struct gk20a *g)
322	{
323	u32 data;
324	s32 retry = 100;
325
326	gk20a_dbg_fn("");
327
328	/* Make sure all previous writes are committed to the L2. There's no
329	guarantee that writes are to DRAM. This will be a sysmembar internal
330	to the L2. */
331	gk20a_writel(g, ltc_ltcs_ltss_g_elpg_r(),
332	ltc_ltcs_ltss_g_elpg_flush_pending_f());
333	do {
334	data = gk20a_readl(g, ltc_ltc0_ltss_g_elpg_r());
335
336	if (ltc_ltc0_ltss_g_elpg_flush_v(data) ==
337	ltc_ltc0_ltss_g_elpg_flush_pending_v()) {
338	gk20a_dbg_info("g_elpg_flush 0x%x", data);
339	retry--;
340	usleep_range(20, 40);
341	} else
342	break;
343	} while (retry >= 0 \|\| !tegra_platform_is_silicon());
344
345	if (retry < 0)
346	gk20a_warn(dev_from_gk20a(g),
347	"g_elpg_flush too many retries");
348
349	}


diff --git a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c index c1ba2aee..9f5317fc 100644 --- a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c
@@ -212,6 +212,40 @@ void gk20a_ltc_isr(struct gk20a *g)
212	gk20a_writel(g, ltc_ltc0_ltss_intr_r(), intr);	212	gk20a_writel(g, ltc_ltc0_ltss_intr_r(), intr);
213	}	213	}
214		214
		215	/* Flushes the compression bit cache as well as "data".
		216	* Note: the name here is a bit of a misnomer. ELPG uses this
		217	* internally... but ELPG doesn't have to be on to do it manually.
		218	*/
		219	static void gk20a_mm_g_elpg_flush_locked(struct gk20a *g)
		220	{
		221	u32 data;
		222	s32 retry = 100;
		223
		224	gk20a_dbg_fn("");
		225
		226	/* Make sure all previous writes are committed to the L2. There's no
		227	guarantee that writes are to DRAM. This will be a sysmembar internal
		228	to the L2. */
		229	gk20a_writel(g, ltc_ltcs_ltss_g_elpg_r(),
		230	ltc_ltcs_ltss_g_elpg_flush_pending_f());
		231	do {
		232	data = gk20a_readl(g, ltc_ltc0_ltss_g_elpg_r());
		233
		234	if (ltc_ltc0_ltss_g_elpg_flush_v(data) ==
		235	ltc_ltc0_ltss_g_elpg_flush_pending_v()) {
		236	gk20a_dbg_info("g_elpg_flush 0x%x", data);
		237	retry--;
		238	usleep_range(20, 40);
		239	} else
		240	break;
		241	} while (retry >= 0 \|\| !tegra_platform_is_silicon());
		242
		243	if (retry < 0)
		244	gk20a_warn(dev_from_gk20a(g),
		245	"g_elpg_flush too many retries");
		246
		247	}
		248
215	void gk20a_init_ltc(struct gpu_ops *gops)	249	void gk20a_init_ltc(struct gpu_ops *gops)
216	{	250	{
217	gops->ltc.determine_L2_size_bytes = gk20a_determine_L2_size_bytes;	251	gops->ltc.determine_L2_size_bytes = gk20a_determine_L2_size_bytes;


diff --git a/drivers/gpu/nvgpu/gm20b/hw_ltc_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_ltc_gm20b.h index 28c58f50..9840805d 100644 --- a/drivers/gpu/nvgpu/gm20b/hw_ltc_gm20b.h +++ b/drivers/gpu/nvgpu/gm20b/hw_ltc_gm20b.h
@@ -96,11 +96,11 @@ static inline u32 ltc_ltcs_ltss_cbc_ctrl1_r(void)
96	}	96	}
97	static inline u32 ltc_ltcs_ltss_cbc_ctrl1_clean_active_f(void)	97	static inline u32 ltc_ltcs_ltss_cbc_ctrl1_clean_active_f(void)
98	{	98	{
99	return 0x1;	99	return 0x1;
100	}	100	}
101	static inline u32 ltc_ltcs_ltss_cbc_ctrl1_invalidate_active_f(void)	101	static inline u32 ltc_ltcs_ltss_cbc_ctrl1_invalidate_active_f(void)
102	{	102	{
103	return 0x2;	103	return 0x2;
104	}	104	}
105	static inline u32 ltc_ltcs_ltss_cbc_ctrl1_clear_v(u32 r)	105	static inline u32 ltc_ltcs_ltss_cbc_ctrl1_clear_v(u32 r)
106	{	106	{
@@ -258,6 +258,22 @@ static inline u32 ltc_ltc0_ltss_g_elpg_flush_pending_f(void)
258	{	258	{
259	return 0x1;	259	return 0x1;
260	}	260	}
		261	static inline u32 ltc_ltc1_ltss_g_elpg_r(void)
		262	{
		263	return 0x00142214;
		264	}
		265	static inline u32 ltc_ltc1_ltss_g_elpg_flush_v(u32 r)
		266	{
		267	return (r >> 0) & 0x1;
		268	}
		269	static inline u32 ltc_ltc1_ltss_g_elpg_flush_pending_v(void)
		270	{
		271	return 0x00000001;
		272	}
		273	static inline u32 ltc_ltc1_ltss_g_elpg_flush_pending_f(void)
		274	{
		275	return 0x1;
		276	}
261	static inline u32 ltc_ltc0_ltss_intr_r(void)	277	static inline u32 ltc_ltc0_ltss_intr_r(void)
262	{	278	{
263	return 0x0014020c;	279	return 0x0014020c;


diff --git a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c index 5da21c64..43c90970 100644 --- a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
@@ -193,6 +193,50 @@ void gm20b_ltc_isr(struct gk20a *g)
193	gk20a_writel(g, ltc_ltc0_ltss_intr_r(), intr);	193	gk20a_writel(g, ltc_ltc0_ltss_intr_r(), intr);
194	}	194	}
195		195
		196	static void gm20b_ltc_g_elpg_flush_locked(struct gk20a *g)
		197	{
		198	u32 data;
		199	bool done[g->ltc_count];
		200	s32 retry = 100;
		201	int i;
		202	int num_done = 0;
		203	u32 ltc_d = ltc_ltc1_ltss_g_elpg_r() - ltc_ltc0_ltss_g_elpg_r();
		204
		205	gk20a_dbg_fn("");
		206
		207	for (i = 0; i < g->ltc_count; i++)
		208	done[i] = 0;
		209
		210	gk20a_writel(g, ltc_ltcs_ltss_g_elpg_r(),
		211	ltc_ltcs_ltss_g_elpg_flush_pending_f());
		212	do {
		213	for (i = 0; i < g->ltc_count; i++) {
		214	if (done[i])
		215	continue;
		216
		217	data = gk20a_readl(g,
		218	ltc_ltc0_ltss_g_elpg_r() + ltc_d * i);
		219
		220	if (ltc_ltc0_ltss_g_elpg_flush_v(data)) {
		221	gk20a_dbg_info("g_elpg_flush 0x%x", data);
		222	} else {
		223	done[i] = 1;
		224	num_done++;
		225	}
		226	}
		227
		228	if (num_done < g->ltc_count) {
		229	retry--;
		230	usleep_range(20, 40);
		231	} else
		232	break;
		233	} while (retry >= 0 \|\| !tegra_platform_is_silicon());
		234
		235	if (retry < 0)
		236	gk20a_warn(dev_from_gk20a(g),
		237	"g_elpg_flush too many retries");
		238	}
		239
196	void gm20b_init_ltc(struct gpu_ops *gops)	240	void gm20b_init_ltc(struct gpu_ops *gops)
197	{	241	{
198	/* Gk20a reused ops. */	242	/* Gk20a reused ops. */
@@ -209,6 +253,6 @@ void gm20b_init_ltc(struct gpu_ops *gops)
209	gops->ltc.init_fs_state = gm20b_ltc_init_fs_state;	253	gops->ltc.init_fs_state = gm20b_ltc_init_fs_state;
210	gops->ltc.init_comptags = gm20b_ltc_init_comptags;	254	gops->ltc.init_comptags = gm20b_ltc_init_comptags;
211	gops->ltc.cbc_ctrl = gm20b_ltc_cbc_ctrl;	255	gops->ltc.cbc_ctrl = gm20b_ltc_cbc_ctrl;
212	gops->ltc.elpg_flush = gk20a_mm_g_elpg_flush_locked;	256	gops->ltc.elpg_flush = gm20b_ltc_g_elpg_flush_locked;
213	gops->ltc.isr = gm20b_ltc_isr;	257	gops->ltc.isr = gm20b_ltc_isr;
214	}	258	}