gpu: nvgpu: Add debugging to the semaphore code

Add GPU debugging to the semaphore code. Bug 1732449 JIRA DNVGPU-12 Change-Id: I98466570cf8d234b49a7f85d88c834648ddaaaee Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: http://git-master/r/1198594 (cherry picked from commit 420809cc31fcdddde32b8e59721676c67b45f592) Reviewed-on: http://git-master/r/1153671 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: Alex Waterman <alexw@nvidia.com> 2016-04-29 20:03:03 -0400
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2016-08-30 13:04:30 -0400
commit: 9eac0fd84921359ded2acdf920de5592322ad93c (patch)
tree: 5d250d475a2dc4629df39cee285a4429660c87f1 /drivers/gpu/nvgpu
parent: 0e69c6707b974726459759464bc7876afe894740 (diff)
4 files changed, 59 insertions, 2 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
index 9c8911e9..e4972610 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -428,9 +428,12 @@ static void add_sema_cmd(struct gk20a *g, struct channel_gk20a *c,
                         struct gk20a_semaphore *s, struct priv_cmd_entry *cmd,
                         int cmd_size, bool acquire, bool wfi)
 {
-        u32 off = cmd->off;
+        int ch = c->hw_chid;
+        u32 ob, off = cmd->off;
        u64 va;
+        ob = off;
        /*
         * RO for acquire (since we just need to read the mem) and RW for
         * release since we will need to write back to the semaphore memory.
@@ -480,6 +483,19 @@ static void add_sema_cmd(struct gk20a *g, struct channel_gk20a *c,
                /* ignored */
                gk20a_mem_wr32(g, cmd->mem, off++, 0);
        }
+        if (acquire)
+                gpu_sema_verbose_dbg("(A) c=%d ACQ_GE %-4u owner=%-3d"
+                                     "va=0x%llx cmd_mem=0x%llx b=0x%llx off=%u",
+                                     ch, gk20a_semaphore_get_value(s),
+                                     s->hw_sema->ch->hw_chid, va, cmd->gva,
+                                     cmd->mem->gpu_va, ob);
+        else
+                gpu_sema_verbose_dbg("(R) c=%d INCR %u (%u) va=0x%llx "
+                                     "cmd_mem=0x%llx b=0x%llx off=%u",
+                                     ch, gk20a_semaphore_get_value(s),
+                                     readl(s->hw_sema->value), va, cmd->gva,
+                                     cmd->mem->gpu_va, ob);
 }
 static int gk20a_channel_semaphore_wait_syncpt(
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 77a84e59..c255f8f8 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -953,6 +953,8 @@ enum gk20a_dbg_categories {
        gpu_dbg_ctxsw   = BIT(12), /* ctxsw tracing */
        gpu_dbg_sched   = BIT(13), /* sched control tracing */
        gpu_dbg_map_v   = BIT(14), /* verbose mem mappings */
+        gpu_dbg_sema    = BIT(15), /* semaphore debugging */
+        gpu_dbg_sema_v  = BIT(16), /* verbose semaphore debugging */
        gpu_dbg_mem     = BIT(31), /* memory accesses, very verbose */
 };
diff --git a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c
index 396e7419..0fa31468 100644
--- a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c
@@ -29,12 +29,15 @@
 #define __lock_sema_sea(s)                                              \
        do {                                                            \
+                gpu_sema_verbose_dbg("Acquiring sema lock...");         \
                mutex_lock(&s->sea_lock);                               \
+                gpu_sema_verbose_dbg("Sema lock aquried!");             \
        } while (0)
 #define __unlock_sema_sea(s)                                            \
        do {                                                            \
                mutex_unlock(&s->sea_lock);                             \
+                gpu_sema_verbose_dbg("Released sema lock");             \
        } while (0)
 /*
@@ -89,11 +92,13 @@ struct gk20a_semaphore_sea *gk20a_semaphore_sea_create(struct gk20a *g)
        if (__gk20a_semaphore_sea_grow(g->sema_sea))
                goto cleanup;
+        gpu_sema_dbg("Created semaphore sea!");
        return g->sema_sea;
 cleanup:
        kfree(g->sema_sea);
        g->sema_sea = NULL;
+        gpu_sema_dbg("Failed to creat semaphore sea!");
        return NULL;
 }
@@ -144,11 +149,14 @@ struct gk20a_semaphore_pool *gk20a_semaphore_pool_alloc(
        list_add(&p->pool_list_entry, &sea->pool_list);
        __unlock_sema_sea(sea);
+        gpu_sema_dbg("Allocated semaphore pool: page-idx=%d", p->page_idx);
        return p;
 fail:
        __unlock_sema_sea(sea);
        kfree(p);
+        gpu_sema_dbg("Failed to allocate semaphore pool!");
        return ERR_PTR(err);
 }
@@ -162,9 +170,13 @@ int gk20a_semaphore_pool_map(struct gk20a_semaphore_pool *p,
        int ents, err = 0;
        u64 addr;
+        gpu_sema_dbg("Mapping sempahore pool! (idx=%d)", p->page_idx);
        p->cpu_va = vmap(&p->page, 1, 0,
                         pgprot_writecombine(PAGE_KERNEL));
+        gpu_sema_dbg("  %d: CPU VA = 0x%p!", p->page_idx, p->cpu_va);
        /* First do the RW mapping. */
        p->rw_sg_table = kzalloc(sizeof(*p->rw_sg_table), GFP_KERNEL);
        if (!p->rw_sg_table)
@@ -185,6 +197,9 @@ int gk20a_semaphore_pool_map(struct gk20a_semaphore_pool *p,
                goto fail_free_sgt;
        }
+        gpu_sema_dbg("  %d: DMA addr = 0x%pad", p->page_idx,
+                     &sg_dma_address(p->rw_sg_table->sgl));
        /* Map into the GPU... Doesn't need to be fixed. */
        p->gpu_va = gk20a_gmmu_map(vm, &p->rw_sg_table, PAGE_SIZE,
                                   0, gk20a_mem_flag_none, false,
@@ -194,6 +209,9 @@ int gk20a_semaphore_pool_map(struct gk20a_semaphore_pool *p,
                goto fail_unmap_sgt;
        }
+        gpu_sema_dbg("  %d: GPU read-write VA = 0x%llx", p->page_idx,
+                     p->gpu_va);
        /*
         * And now the global mapping. Take the sea lock so that we don't race
         * with a concurrent remap.
@@ -215,6 +233,9 @@ int gk20a_semaphore_pool_map(struct gk20a_semaphore_pool *p,
        p->gpu_va_ro = addr;
        p->mapped = 1;
+        gpu_sema_dbg("  %d: GPU read-only  VA = 0x%llx", p->page_idx,
+                     p->gpu_va_ro);
        __unlock_sema_sea(p->sema_sea);
        return 0;
@@ -229,6 +250,7 @@ fail_free_sgt:
 fail:
        kfree(p->rw_sg_table);
        p->rw_sg_table = NULL;
+        gpu_sema_dbg("  %d: Failed to map semaphore pool!", p->page_idx);
        return err;
 }
@@ -260,13 +282,14 @@ void gk20a_semaphore_pool_unmap(struct gk20a_semaphore_pool *p,
        kfree(p->rw_sg_table);
        p->rw_sg_table = NULL;
-        gk20a_dbg_info("Unmapped sema-pool: idx = %d", p->page_idx);
        list_for_each_entry(hw_sema, &p->hw_semas, hw_sema_list)
                /*
                 * Make sure the mem addresses are all NULL so if this gets
                 * reused we will fault.
                 */
                hw_sema->value = NULL;
+        gpu_sema_dbg("Unmapped semaphore pool! (idx=%d)", p->page_idx);
 }
 /*
@@ -291,6 +314,7 @@ static void gk20a_semaphore_pool_free(struct kref *ref)
        list_for_each_entry_safe(hw_sema, tmp, &p->hw_semas, hw_sema_list)
                kfree(hw_sema);
+        gpu_sema_dbg("Freed semaphore pool! (idx=%d)", p->page_idx);
        kfree(p);
 }
@@ -415,6 +439,8 @@ struct gk20a_semaphore *gk20a_semaphore_alloc(struct channel_gk20a *ch)
         */
        gk20a_semaphore_pool_get(s->hw_sema->p);
+        gpu_sema_dbg("Allocated semaphore (c=%d)", ch->hw_chid);
        return s;
 }
diff --git a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h
index 29a01ab8..d96037ce 100644
--- a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h
@@ -21,6 +21,12 @@
 #include "gk20a.h"
 #include "mm_gk20a.h"
 #include "channel_gk20a.h"
+#include "gk20a_allocator.h"
+#define gpu_sema_dbg(fmt, args...)              \
+        gk20a_dbg(gpu_dbg_sema, fmt, ##args)
+#define gpu_sema_verbose_dbg(fmt, args...)      \
+        gk20a_dbg(gpu_dbg_sema_v, fmt, ##args)
 /*
 * Max number of channels that can be used is 512. This of course needs to be
@@ -274,6 +280,9 @@ static inline void gk20a_semaphore_release(struct gk20a_semaphore *s)
                return;
        writel(val, s->hw_sema->value);
+        gpu_sema_verbose_dbg("(c=%d) WRITE %u",
+                             s->hw_sema->ch->hw_chid, val);
 }
 /*
@@ -291,5 +300,9 @@ static inline void gk20a_semaphore_incr(struct gk20a_semaphore *s)
        atomic_set(&s->value, atomic_add_return(1, &s->hw_sema->next_value));
        s->incremented = 1;
+        gpu_sema_verbose_dbg("INCR sema for c=%d (%u)",
+                             s->hw_sema->ch->hw_chid,
+                             gk20a_semaphore_next_value(s));
 }
 #endif
author	Alex Waterman <alexw@nvidia.com>	2016-04-29 20:03:03 -0400
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2016-08-30 13:04:30 -0400
commit	9eac0fd84921359ded2acdf920de5592322ad93c (patch)
tree	5d250d475a2dc4629df39cee285a4429660c87f1 /drivers/gpu/nvgpu
parent	0e69c6707b974726459759464bc7876afe894740 (diff)