gpu: nvgpu: Cache channel state before dumping

Split channel debug dump into two phases. In first phase we just copy the data to a temporary buffer, and in second phase we dump the state from the temporary buffer. Change-Id: I2578b9fdaaa76f1230df7badbca9fcb5f3854e56 Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/717886 Reviewed-by: Konsta Holtta <kholtta@nvidia.com>
author: Terje Bergstrom <tbergstrom@nvidia.com> 2015-03-16 13:00:32 -0400
committer: Dan Willemsen <dwillemsen@nvidia.com> 2015-04-04 22:00:44 -0400
commit: 78d8f8fe366d521e1acb62a96ca5f0d72e15c8f5 (patch)
tree: e061e1aa42b10bd0bb3b1293770626106edea765 /drivers/gpu/nvgpu/gk20a
parent: 1eded552869f6957bec7695554752e26391daaee (diff)
1 files changed, 41 insertions, 10 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c
index ace05c07..2c37d22d 100644
--- a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c
@@ -34,6 +34,11 @@
 unsigned int gk20a_debug_trace_cmdbuf;
 static struct platform_device *gk20a_device;
+struct ch_state {
+        int pid;
+        u8 inst_block[0];
+};
 static const char * const ccsr_chan_status_str[] = {
        "idle",
        "pending",
@@ -97,23 +102,25 @@ void gk20a_debug_output(struct gk20a_debug_output *o,
 static void gk20a_debug_show_channel(struct gk20a *g,
                                     struct gk20a_debug_output *o,
-                                     struct channel_gk20a *ch)
+                                     u32 hw_chid,
+                                     struct ch_state *ch_state)
 {
-        u32 channel = gk20a_readl(g, ccsr_channel_r(ch->hw_chid));
+        u32 channel = gk20a_readl(g, ccsr_channel_r(hw_chid));
        u32 status = ccsr_channel_status_v(channel);
        u32 syncpointa, syncpointb;
        void *inst_ptr;
-        inst_ptr = ch->inst_block.cpu_va;
+        if (!ch_state)
-        if (!inst_ptr)
                return;
+        inst_ptr = &ch_state->inst_block[0];
        syncpointa = gk20a_mem_rd32(inst_ptr, ram_fc_syncpointa_w());
        syncpointb = gk20a_mem_rd32(inst_ptr, ram_fc_syncpointb_w());
-        gk20a_debug_output(o, "%d-%s, pid %d: ", ch->hw_chid,
+        gk20a_debug_output(o, "%d-%s, pid %d: ", hw_chid,
-                        ch->g->dev->name,
+                        g->dev->name,
-                        ch->pid);
+                        ch_state->pid);
        gk20a_debug_output(o, "%s in use %s %s\n",
                        ccsr_channel_enable_v(channel) ? "" : "not",
                        ccsr_chan_status_str[status],
@@ -160,6 +167,8 @@ void gk20a_debug_show_dump(struct gk20a *g, struct gk20a_debug_output *o)
        u32 chid;
        int i, err;
+        struct ch_state **ch_state;
        err = gk20a_busy(g->dev);
        if (err) {
                gk20a_debug_output(o, "failed to power on gpu: %d\n", err);
@@ -214,12 +223,34 @@ void gk20a_debug_show_dump(struct gk20a *g, struct gk20a_debug_output *o)
        }
        gk20a_debug_output(o, "\n");
+        ch_state = kzalloc(sizeof(*ch_state)
+                                 * f->num_channels, GFP_KERNEL);
+        if (!ch_state) {
+                gk20a_debug_output(o, "cannot alloc memory for channels\n");
+                goto done;
+        }
+        for (chid = 0; chid < f->num_channels; chid++) {
+                if (f->channel[chid].in_use)
+                        ch_state[chid] = kmalloc(sizeof(struct ch_state) + ram_in_alloc_size_v(), GFP_KERNEL);
+        }
+        for (chid = 0; chid < f->num_channels; chid++) {
+                if (ch_state[chid] && f->channel[chid].inst_block.cpu_va) {
+                        ch_state[chid]->pid = f->channel[chid].pid;
+                        memcpy(&ch_state[chid]->inst_block[0],
+                               f->channel[chid].inst_block.cpu_va,
+                               ram_in_alloc_size_v());
+                }
+        }
        for (chid = 0; chid < f->num_channels; chid++) {
-                if (f->channel[chid].in_use) {
+                if (ch_state[chid]) {
-                        struct channel_gk20a *gpu_ch = &f->channel[chid];
+                        gk20a_debug_show_channel(g, o, chid, ch_state[chid]);
-                        gk20a_debug_show_channel(g, o, gpu_ch);
+                        kfree(ch_state[chid]);
                }
        }
+        kfree(ch_state);
+done:
        gk20a_idle(g->dev);
 }
author	Terje Bergstrom <tbergstrom@nvidia.com>	2015-03-16 13:00:32 -0400
committer	Dan Willemsen <dwillemsen@nvidia.com>	2015-04-04 22:00:44 -0400
commit	78d8f8fe366d521e1acb62a96ca5f0d72e15c8f5 (patch)
tree	e061e1aa42b10bd0bb3b1293770626106edea765 /drivers/gpu/nvgpu/gk20a
parent	1eded552869f6957bec7695554752e26391daaee (diff)