From 78d8f8fe366d521e1acb62a96ca5f0d72e15c8f5 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Mon, 16 Mar 2015 10:00:32 -0700
Subject: gpu: nvgpu: Cache channel state before dumping

Split channel debug dump into two phases. In first phase we just copy
the data to a temporary buffer, and in second phase we dump the state
from the temporary buffer.

Change-Id: I2578b9fdaaa76f1230df7badbca9fcb5f3854e56
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/717886
Reviewed-by: Konsta Holtta <kholtta@nvidia.com>
---
 drivers/gpu/nvgpu/gk20a/debug_gk20a.c | 51 ++++++++++++++++++++++++++++-------
 1 file changed, 41 insertions(+), 10 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c
index ace05c07..2c37d22d 100644
--- a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c
@@ -34,6 +34,11 @@
 unsigned int gk20a_debug_trace_cmdbuf;
 static struct platform_device *gk20a_device;
 
+struct ch_state {
+	int pid;
+	u8 inst_block[0];
+};
+
 static const char * const ccsr_chan_status_str[] = {
 	"idle",
 	"pending",
@@ -97,23 +102,25 @@ void gk20a_debug_output(struct gk20a_debug_output *o,
 
 static void gk20a_debug_show_channel(struct gk20a *g,
 				     struct gk20a_debug_output *o,
-				     struct channel_gk20a *ch)
+				     u32 hw_chid,
+				     struct ch_state *ch_state)
 {
-	u32 channel = gk20a_readl(g, ccsr_channel_r(ch->hw_chid));
+	u32 channel = gk20a_readl(g, ccsr_channel_r(hw_chid));
 	u32 status = ccsr_channel_status_v(channel);
 	u32 syncpointa, syncpointb;
 	void *inst_ptr;
 
-	inst_ptr = ch->inst_block.cpu_va;
-	if (!inst_ptr)
+	if (!ch_state)
 		return;
 
+	inst_ptr = &ch_state->inst_block[0];
+
 	syncpointa = gk20a_mem_rd32(inst_ptr, ram_fc_syncpointa_w());
 	syncpointb = gk20a_mem_rd32(inst_ptr, ram_fc_syncpointb_w());
 
-	gk20a_debug_output(o, "%d-%s, pid %d: ", ch->hw_chid,
-			ch->g->dev->name,
-			ch->pid);
+	gk20a_debug_output(o, "%d-%s, pid %d: ", hw_chid,
+			g->dev->name,
+			ch_state->pid);
 	gk20a_debug_output(o, "%s in use %s %s\n",
 			ccsr_channel_enable_v(channel) ? "" : "not",
 			ccsr_chan_status_str[status],
@@ -160,6 +167,8 @@ void gk20a_debug_show_dump(struct gk20a *g, struct gk20a_debug_output *o)
 	u32 chid;
 	int i, err;
 
+	struct ch_state **ch_state;
+
 	err = gk20a_busy(g->dev);
 	if (err) {
 		gk20a_debug_output(o, "failed to power on gpu: %d\n", err);
@@ -214,12 +223,34 @@ void gk20a_debug_show_dump(struct gk20a *g, struct gk20a_debug_output *o)
 	}
 	gk20a_debug_output(o, "\n");
 
+	ch_state = kzalloc(sizeof(*ch_state)
+				 * f->num_channels, GFP_KERNEL);
+	if (!ch_state) {
+		gk20a_debug_output(o, "cannot alloc memory for channels\n");
+		goto done;
+	}
+
+	for (chid = 0; chid < f->num_channels; chid++) {
+		if (f->channel[chid].in_use)
+			ch_state[chid] = kmalloc(sizeof(struct ch_state) + ram_in_alloc_size_v(), GFP_KERNEL);
+	}
+
+	for (chid = 0; chid < f->num_channels; chid++) {
+		if (ch_state[chid] && f->channel[chid].inst_block.cpu_va) {
+			ch_state[chid]->pid = f->channel[chid].pid;
+			memcpy(&ch_state[chid]->inst_block[0],
+			       f->channel[chid].inst_block.cpu_va,
+			       ram_in_alloc_size_v());
+		}
+	}
 	for (chid = 0; chid < f->num_channels; chid++) {
-		if (f->channel[chid].in_use) {
-			struct channel_gk20a *gpu_ch = &f->channel[chid];
-			gk20a_debug_show_channel(g, o, gpu_ch);
+		if (ch_state[chid]) {
+			gk20a_debug_show_channel(g, o, chid, ch_state[chid]);
+			kfree(ch_state[chid]);
 		}
 	}
+	kfree(ch_state);
+done:
 	gk20a_idle(g->dev);
 }
 
-- 
cgit v1.2.2