From 86225cb04eb040a83400d5ad6619b3c2318a53f8 Mon Sep 17 00:00:00 2001
From: Richard Zhao <rizhao@nvidia.com>
Date: Thu, 2 Jun 2016 17:17:14 -0700
Subject: gpu: nvgpu: add read_ptimer to gops

Move all places that read ptimer to use the callback.
It's for add vgpu implementation of read ptimer.

Bug 1395833

Change-Id: Ia339f2f08d75ca4969a443fffc9a61cff1d3d2b7
Signed-off-by: Richard Zhao <rizhao@nvidia.com>
Reviewed-on: http://git-master/r/1159587
(cherry picked from commit a01f804684f875c9cffc31eb2c1038f2f29ec66f)
Reviewed-on: http://git-master/r/1158449
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c        | 43 ++++-----------------------
 drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c |  4 +--
 drivers/gpu/nvgpu/gk20a/gk20a.c             | 45 +++++++++++++++++++++++------
 drivers/gpu/nvgpu/gk20a/gk20a.h             |  3 +-
 drivers/gpu/nvgpu/gk20a/hal_gk20a.c         |  1 +
 drivers/gpu/nvgpu/gm20b/hal_gm20b.c         |  1 +
 6 files changed, 47 insertions(+), 50 deletions(-)

(limited to 'drivers/gpu/nvgpu')

diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
index 39581eb2..3e34b6b8 100644
--- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
@@ -626,50 +626,17 @@ static int nvgpu_gpu_get_gpu_time(
 	struct gk20a *g,
 	struct nvgpu_gpu_get_gpu_time_args *args)
 {
-	int err = 0;
-	const unsigned int max_iterations = 3;
-	unsigned int i = 0;
-	u32 gpu_timestamp_hi_prev = 0;
+	u64 time;
+	int err;
 
 	err = gk20a_busy(g->dev);
 	if (err)
 		return err;
 
-	/* Note. The GPU nanosecond timer consists of two 32-bit
-	 * registers (high & low). To detect a possible low register
-	 * wrap-around between the reads, we need to read the high
-	 * register before and after low. The wraparound happens
-	 * approximately once per 4 secs. */
+	err = g->ops.read_ptimer(g, &time);
+	if (!err)
+		args->gpu_timestamp = time;
 
-	/* get initial gpu_timestamp_hi value */
-	gpu_timestamp_hi_prev = gk20a_readl(g, timer_time_1_r());
-
-	for (i = 0; i < max_iterations; ++i) {
-		u32 gpu_timestamp_hi = 0;
-		u32 gpu_timestamp_lo = 0;
-
-		rmb(); /* maintain read order */
-		gpu_timestamp_lo = gk20a_readl(g, timer_time_0_r());
-		rmb(); /* maintain read order */
-		gpu_timestamp_hi = gk20a_readl(g, timer_time_1_r());
-
-		if (gpu_timestamp_hi == gpu_timestamp_hi_prev) {
-			args->gpu_timestamp =
-				(((u64)gpu_timestamp_hi) << 32) |
-				gpu_timestamp_lo;
-			goto clean_up;
-		}
-
-		/* wrap-around detected, retry */
-		gpu_timestamp_hi_prev = gpu_timestamp_hi;
-	}
-
-	/* too many iterations, bail out */
-	gk20a_err(dev_from_gk20a(g),
-		  "Failed to read GPU time. Clock or bus unstable?\n");
-	err = -EBUSY;
-
-clean_up:
 	gk20a_idle(g->dev);
 	return err;
 }
diff --git a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
index 3682d904..aadbf7af 100644
--- a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
@@ -656,7 +656,6 @@ void gk20a_ctxsw_trace_channel_reset(struct gk20a *g, struct channel_gk20a *ch)
 	struct nvgpu_ctxsw_trace_entry entry = {
 		.vmid = 0,
 		.tag = NVGPU_CTXSW_TAG_ENGINE_RESET,
-		.timestamp = gk20a_read_ptimer(g),
 		.context_id = 0,
 		.pid = ch->pid,
 	};
@@ -664,6 +663,7 @@ void gk20a_ctxsw_trace_channel_reset(struct gk20a *g, struct channel_gk20a *ch)
 	if (!g->ctxsw_trace)
 		return;
 
+	g->ops.read_ptimer(g, &entry.timestamp);
 	gk20a_ctxsw_trace_write(g, &entry);
 	gk20a_ctxsw_trace_wake_up(g, 0);
 #endif
@@ -676,7 +676,6 @@ void gk20a_ctxsw_trace_tsg_reset(struct gk20a *g, struct tsg_gk20a *tsg)
 	struct nvgpu_ctxsw_trace_entry entry = {
 		.vmid = 0,
 		.tag = NVGPU_CTXSW_TAG_ENGINE_RESET,
-		.timestamp = gk20a_read_ptimer(g),
 		.context_id = 0,
 		.pid = 0,
 	};
@@ -685,6 +684,7 @@ void gk20a_ctxsw_trace_tsg_reset(struct gk20a *g, struct tsg_gk20a *tsg)
 	if (!g->ctxsw_trace)
 		return;
 
+	g->ops.read_ptimer(g, &entry.timestamp);
 	mutex_lock(&tsg->ch_list_lock);
 	if (!list_empty(&tsg->ch_list)) {
 		ch = list_entry(tsg->ch_list.next,
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index d21d0527..db77d40f 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -2227,18 +2227,45 @@ gk20a_request_firmware(struct gk20a *g, const char *fw_name)
 	return fw;
 }
 
-
-u64 gk20a_read_ptimer(struct gk20a *g)
+int gk20a_read_ptimer(struct gk20a *g, u64 *value)
 {
-	u32 time_hi0 = gk20a_readl(g, timer_time_1_r());
-	u32 time_lo = gk20a_readl(g, timer_time_0_r());
-	u32 time_hi1 = gk20a_readl(g, timer_time_1_r());
-	u32 time_hi = (time_lo & (1L << 31)) ? time_hi0 : time_hi1;
-	u64 time = ((u64)time_hi << 32) | time_lo;
+	const unsigned int max_iterations = 3;
+	unsigned int i = 0;
+	u32 gpu_timestamp_hi_prev = 0;
 
-	return time;
-}
+	if (!value)
+		return -EINVAL;
+
+	/* Note. The GPU nanosecond timer consists of two 32-bit
+	 * registers (high & low). To detect a possible low register
+	 * wrap-around between the reads, we need to read the high
+	 * register before and after low. The wraparound happens
+	 * approximately once per 4 secs. */
+
+	/* get initial gpu_timestamp_hi value */
+	gpu_timestamp_hi_prev = gk20a_readl(g, timer_time_1_r());
+
+	for (i = 0; i < max_iterations; ++i) {
+		u32 gpu_timestamp_hi = 0;
+		u32 gpu_timestamp_lo = 0;
+
+		gpu_timestamp_lo = gk20a_readl(g, timer_time_0_r());
+		gpu_timestamp_hi = gk20a_readl(g, timer_time_1_r());
 
+		if (gpu_timestamp_hi == gpu_timestamp_hi_prev) {
+			*value = (((u64)gpu_timestamp_hi) << 32) |
+				gpu_timestamp_lo;
+			return 0;
+		}
+
+		/* wrap-around detected, retry */
+		gpu_timestamp_hi_prev = gpu_timestamp_hi;
+	}
+
+	/* too many iterations, bail out */
+	gk20a_err(dev_from_gk20a(g), "failed to read ptimer");
+	return -EBUSY;
+}
 
 MODULE_LICENSE("GPL v2");
 module_init(gk20a_init);
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 4ede8982..e639ace4 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -620,6 +620,7 @@ struct gpu_ops {
 
 	int (*get_litter_value)(struct gk20a *g, enum nvgpu_litter_value value);
 	int (*chip_init_gpu_characteristics)(struct gk20a *g);
+	int (*read_ptimer)(struct gk20a *g, u64 *value);
 
 	struct {
 		int (*init)(struct gk20a *g);
@@ -1111,7 +1112,7 @@ static inline u32 scale_ptimer(u32 timeout , u32 scale10x)
 		return (timeout * 10) / scale10x;
 }
 
-u64 gk20a_read_ptimer(struct gk20a *g);
+int gk20a_read_ptimer(struct gk20a *g, u64 *value);
 extern struct class nvgpu_class;
 
 #define INTERFACE_NAME "nvhost%s-gpu"
diff --git a/drivers/gpu/nvgpu/gk20a/hal_gk20a.c b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c
index bf58d298..99c55992 100644
--- a/drivers/gpu/nvgpu/gk20a/hal_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c
@@ -150,6 +150,7 @@ int gk20a_init_hal(struct gk20a *g)
 	gops->name = "gk20a";
 	gops->chip_init_gpu_characteristics = gk20a_init_gpu_characteristics;
 	gops->get_litter_value = gk20a_get_litter_value;
+	gops->read_ptimer = gk20a_read_ptimer;
 
 	c->twod_class = FERMI_TWOD_A;
 	c->threed_class = KEPLER_C;
diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
index cbd3f50b..4f2fffc8 100644
--- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
@@ -218,6 +218,7 @@ int gm20b_init_hal(struct gk20a *g)
 	gops->name = "gm20b";
 	gops->chip_init_gpu_characteristics = gk20a_init_gpu_characteristics;
 	gops->get_litter_value = gm20b_get_litter_value;
+	gops->read_ptimer = gk20a_read_ptimer;
 
 	c->twod_class = FERMI_TWOD_A;
 	c->threed_class = MAXWELL_B;
-- 
cgit v1.2.2