From fba43012c092b7566a1e83d140e0c5f170de62f0 Mon Sep 17 00:00:00 2001
From: Thomas Fleury <tfleury@nvidia.com>
Date: Wed, 24 Aug 2016 15:12:35 -0700
Subject: gpu: nvgpu: do not flush FECS record on engine reset

Flushing timestamp record method can fail in case FECS is not
processing the main method queue. In particular, this occurs
in case of ctxsw timeout, where we process fifo sched interrupts
from the host, but FECS is still waiting for idle (grWFI).
In such scenario, this adds huge delay in fifo recovery
procedure (timeout on FECS method). Since flushing the last
(incomplete) record from FECS would only be useful in that case
(context switch ongoing), remove flush operation on engine
reset. Note that an explicit ENGINE_RESET event (with pid)
is inserted in user-facing ctxsw buffer on engine reset.

Bug 200228310

Change-Id: I885525f8f197f81266b50db161bb511867fc74f4
Signed-off-by: Thomas Fleury <tfleury@nvidia.com>
Reviewed-on: http://git-master/r/1207305
(cherry picked from commit 44391b6204fd648949295f90481b0c424d9a5ddf)
Reviewed-on: http://git-master/r/1208414
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c | 22 +++++++++++++++++-----
 drivers/gpu/nvgpu/gk20a/gk20a.h            |  1 +
 drivers/gpu/nvgpu/vgpu/fecs_trace_vgpu.c   | 13 +++++++++++++
 3 files changed, 31 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
index abf1cc55..4bfbf503 100644
--- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
@@ -651,9 +651,11 @@ static int gk20a_fecs_trace_unbind_channel(struct gk20a *g, struct channel_gk20a
 	gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw,
 			"ch=%p context_ptr=%x", ch, context_ptr);
 
-	if (g->ops.fecs_trace.flush)
-		g->ops.fecs_trace.flush(g);
-	gk20a_fecs_trace_poll(g);
+	if (g->ops.fecs_trace.is_enabled(g)) {
+		if (g->ops.fecs_trace.flush)
+			g->ops.fecs_trace.flush(g);
+		gk20a_fecs_trace_poll(g);
+	}
 	gk20a_fecs_trace_hash_del(g, context_ptr);
 	return 0;
 }
@@ -662,8 +664,9 @@ static int gk20a_fecs_trace_reset(struct gk20a *g)
 {
 	gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "");
 
-	if (g->ops.fecs_trace.flush)
-		g->ops.fecs_trace.flush(g);
+	if (!g->ops.fecs_trace.is_enabled(g))
+		return 0;
+
 	gk20a_fecs_trace_poll(g);
 	return gk20a_fecs_trace_set_read_index(g, 0);
 }
@@ -725,6 +728,14 @@ static int gk20a_fecs_trace_disable(struct gk20a *g)
 	return -EPERM;
 }
 
+static bool gk20a_fecs_trace_is_enabled(struct gk20a *g)
+{
+	struct gk20a_fecs_trace *trace = g->fecs_trace;
+
+	return (trace && trace->poll_task);
+}
+
+
 void gk20a_init_fecs_trace_ops(struct gpu_ops *ops)
 {
 	gk20a_ctxsw_trace_init_ops(ops);
@@ -732,6 +743,7 @@ void gk20a_init_fecs_trace_ops(struct gpu_ops *ops)
 	ops->fecs_trace.deinit = gk20a_fecs_trace_deinit;
 	ops->fecs_trace.enable = gk20a_fecs_trace_enable;
 	ops->fecs_trace.disable = gk20a_fecs_trace_disable;
+	ops->fecs_trace.is_enabled = gk20a_fecs_trace_is_enabled;
 	ops->fecs_trace.reset = gk20a_fecs_trace_reset;
 	ops->fecs_trace.flush = NULL;
 	ops->fecs_trace.poll = gk20a_fecs_trace_poll;
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 463317e3..2b348677 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -484,6 +484,7 @@ struct gpu_ops {
 		int (*poll)(struct gk20a *g);
 		int (*enable)(struct gk20a *g);
 		int (*disable)(struct gk20a *g);
+		bool (*is_enabled)(struct gk20a *g);
 		int (*reset)(struct gk20a *g);
 		int (*bind_channel)(struct gk20a *, struct channel_gk20a *);
 		int (*unbind_channel)(struct gk20a *, struct channel_gk20a *);
diff --git a/drivers/gpu/nvgpu/vgpu/fecs_trace_vgpu.c b/drivers/gpu/nvgpu/vgpu/fecs_trace_vgpu.c
index 634932b7..c80da26d 100644
--- a/drivers/gpu/nvgpu/vgpu/fecs_trace_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/fecs_trace_vgpu.c
@@ -26,6 +26,7 @@ struct vgpu_fecs_trace {
 	struct nvgpu_ctxsw_ring_header *header;
 	struct nvgpu_ctxsw_trace_entry *entries;
 	int num_entries;
+	bool enabled;
 	void *buf;
 };
 
@@ -104,6 +105,7 @@ static int vgpu_fecs_trace_deinit(struct gk20a *g)
 
 static int vgpu_fecs_trace_enable(struct gk20a *g)
 {
+	struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace;
 	struct tegra_vgpu_cmd_msg msg = {
 		.cmd = TEGRA_VGPU_CMD_FECS_TRACE_ENABLE,
 		.handle = vgpu_get_handle(g),
@@ -113,23 +115,33 @@ static int vgpu_fecs_trace_enable(struct gk20a *g)
 	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
 	err = err ? err : msg.ret;
 	WARN_ON(err);
+	vcst->enabled = !err;
 	return err;
 }
 
 static int vgpu_fecs_trace_disable(struct gk20a *g)
 {
+	struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace;
 	struct tegra_vgpu_cmd_msg msg = {
 		.cmd = TEGRA_VGPU_CMD_FECS_TRACE_DISABLE,
 		.handle = vgpu_get_handle(g),
 	};
 	int err;
 
+	vcst->enabled = false;
 	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
 	err = err ? err : msg.ret;
 	WARN_ON(err);
 	return err;
 }
 
+static bool vpgpu_fecs_trace_is_enabled(struct gk20a *g)
+{
+	struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace;
+
+	return (vcst && vcst->enabled);
+}
+
 static int vgpu_fecs_trace_poll(struct gk20a *g)
 {
 	struct tegra_vgpu_cmd_msg msg = {
@@ -208,6 +220,7 @@ void vgpu_init_fecs_trace_ops(struct gpu_ops *ops)
 	ops->fecs_trace.deinit = vgpu_fecs_trace_deinit;
 	ops->fecs_trace.enable = vgpu_fecs_trace_enable;
 	ops->fecs_trace.disable = vgpu_fecs_trace_disable;
+	ops->fecs_trace.is_enabled = vpgpu_fecs_trace_is_enabled;
 	ops->fecs_trace.reset = NULL;
 	ops->fecs_trace.flush = NULL;
 	ops->fecs_trace.poll = vgpu_fecs_trace_poll;
-- 
cgit v1.2.2