From ced17a2d31a49b771b0cc9715f15330a408f8e69 Mon Sep 17 00:00:00 2001
From: Seshendra Gadagottu <sgadagottu@nvidia.com>
Date: Mon, 23 Feb 2015 13:42:03 -0800
Subject: gpu: nvgpu: Use busy looping for flush operations

Use busy looping for l2 tag flush and elpg flush
operations. This is making total flash time more
accurate and reduced overall time compared with
usleep. Also added trace points to measure
performance for these operations.

Also corrected timeout error check for non-silicon
platforms.

Bug 200081799

Change-Id: I63410bb7528db9258501633996fbdee5fdec1c74
Signed-off-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-on: http://git-master/r/710472
(cherry picked from commit 18684cf9d5d6870a1a1fd5711c4fc2d733caad20)
Reviewed-on: http://git-master/r/710986
GVS: Gerrit_Virtual_Submit
Reviewed-by: Yu-Huan Hsu <yhsu@nvidia.com>
---
 drivers/gpu/nvgpu/gm20b/ltc_gm20b.c | 32 ++++++++++++++++++--------------
 1 file changed, 18 insertions(+), 14 deletions(-)

(limited to 'drivers/gpu/nvgpu/gm20b/ltc_gm20b.c')

diff --git a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
index 0a0efe41..522cd1dc 100644
--- a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
@@ -1,7 +1,7 @@
 /*
  * GM20B L2
  *
- * Copyright (c) 2014 NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2015 NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -15,6 +15,7 @@
 
 #include <linux/types.h>
 #include <linux/jiffies.h>
+#include <trace/events/gk20a.h>
 
 #include "hw_mc_gm20b.h"
 #include "hw_ltc_gm20b.h"
@@ -26,6 +27,7 @@
 #include "gk20a/gk20a.h"
 #include "gk20a/gk20a_allocator.h"
 
+
 static int gm20b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr)
 {
 	/* max memory size (MB) to cover */
@@ -107,14 +109,14 @@ int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
 	int err = 0;
 	struct gr_gk20a *gr = &g->gr;
 	u32 ltc, slice, ctrl1, val, hw_op = 0;
-	unsigned long end_jiffies = jiffies +
-		msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
-	u32 delay = GR_IDLE_CHECK_DEFAULT;
+	s32 retry = 200;
 	u32 slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v(
 				gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()));
 
 	gk20a_dbg_fn("");
 
+	trace_gk20a_ltc_cbc_ctrl_start(g->dev->name, op, min, max);
+
 	if (gr->compbit_store.size == 0)
 		return 0;
 
@@ -139,25 +141,22 @@ int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
 	for (ltc = 0; ltc < g->ltc_count; ltc++) {
 		for (slice = 0; slice < slices_per_ltc; slice++) {
 
-			delay = GR_IDLE_CHECK_DEFAULT;
-
 			ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() +
 				ltc * proj_ltc_stride_v() +
 				slice * proj_lts_stride_v();
 
+			retry = 200;
 			do {
 				val = gk20a_readl(g, ctrl1);
 				if (!(val & hw_op))
 					break;
+				retry--;
+				udelay(5);
 
-				usleep_range(delay, delay * 2);
-				delay = min_t(u32, delay << 1,
-					GR_IDLE_CHECK_MAX);
-
-			} while (time_before(jiffies, end_jiffies) |
+			} while (retry >= 0 ||
 					!tegra_platform_is_silicon());
 
-			if (!time_before(jiffies, end_jiffies)) {
+			if (retry < 0 && tegra_platform_is_silicon()) {
 				gk20a_err(dev_from_gk20a(g),
 					   "comp tag clear timeout\n");
 				err = -EBUSY;
@@ -166,6 +165,7 @@ int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
 		}
 	}
 out:
+	trace_gk20a_ltc_cbc_ctrl_done(g->dev->name);
 	mutex_unlock(&g->mm.l2_op_lock);
 	return 0;
 }
@@ -232,6 +232,8 @@ void gm20b_ltc_g_elpg_flush_locked(struct gk20a *g)
 
 	gk20a_dbg_fn("");
 
+	trace_gk20a_mm_g_elpg_flush_locked(g->dev->name);
+
 	for (i = 0; i < g->ltc_count; i++)
 		done[i] = 0;
 
@@ -255,14 +257,16 @@ void gm20b_ltc_g_elpg_flush_locked(struct gk20a *g)
 
 		if (num_done < g->ltc_count) {
 			retry--;
-			usleep_range(20, 40);
+			udelay(5);
 		} else
 			break;
 	} while (retry >= 0 || !tegra_platform_is_silicon());
 
-	if (retry < 0)
+	if (retry < 0 && tegra_platform_is_silicon())
 		gk20a_warn(dev_from_gk20a(g),
 			    "g_elpg_flush too many retries");
+
+	trace_gk20a_mm_g_elpg_flush_locked_done(g->dev->name);
 }
 
 u32 gm20b_ltc_cbc_fix_config(struct gk20a *g, int base)
-- 
cgit v1.2.2