From 9eac0fd84921359ded2acdf920de5592322ad93c Mon Sep 17 00:00:00 2001
From: Alex Waterman <alexw@nvidia.com>
Date: Fri, 29 Apr 2016 17:03:03 -0700
Subject: gpu: nvgpu: Add debugging to the semaphore code

Add GPU debugging to the semaphore code.

Bug 1732449
JIRA DNVGPU-12

Change-Id: I98466570cf8d234b49a7f85d88c834648ddaaaee
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: http://git-master/r/1198594
(cherry picked from commit 420809cc31fcdddde32b8e59721676c67b45f592)
Reviewed-on: http://git-master/r/1153671
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | 18 +++++++++++++++++-
 drivers/gpu/nvgpu/gk20a/gk20a.h              |  2 ++
 drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c    | 28 +++++++++++++++++++++++++++-
 drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h    | 13 +++++++++++++
 4 files changed, 59 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu/nvgpu/gk20a')

diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
index 9c8911e9..e4972610 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -428,9 +428,12 @@ static void add_sema_cmd(struct gk20a *g, struct channel_gk20a *c,
 			 struct gk20a_semaphore *s, struct priv_cmd_entry *cmd,
 			 int cmd_size, bool acquire, bool wfi)
 {
-	u32 off = cmd->off;
+	int ch = c->hw_chid;
+	u32 ob, off = cmd->off;
 	u64 va;
 
+	ob = off;
+
 	/*
 	 * RO for acquire (since we just need to read the mem) and RW for
 	 * release since we will need to write back to the semaphore memory.
@@ -480,6 +483,19 @@ static void add_sema_cmd(struct gk20a *g, struct channel_gk20a *c,
 		/* ignored */
 		gk20a_mem_wr32(g, cmd->mem, off++, 0);
 	}
+
+	if (acquire)
+		gpu_sema_verbose_dbg("(A) c=%d ACQ_GE %-4u owner=%-3d"
+				     "va=0x%llx cmd_mem=0x%llx b=0x%llx off=%u",
+				     ch, gk20a_semaphore_get_value(s),
+				     s->hw_sema->ch->hw_chid, va, cmd->gva,
+				     cmd->mem->gpu_va, ob);
+	else
+		gpu_sema_verbose_dbg("(R) c=%d INCR %u (%u) va=0x%llx "
+				     "cmd_mem=0x%llx b=0x%llx off=%u",
+				     ch, gk20a_semaphore_get_value(s),
+				     readl(s->hw_sema->value), va, cmd->gva,
+				     cmd->mem->gpu_va, ob);
 }
 
 static int gk20a_channel_semaphore_wait_syncpt(
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 77a84e59..c255f8f8 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -953,6 +953,8 @@ enum gk20a_dbg_categories {
 	gpu_dbg_ctxsw   = BIT(12), /* ctxsw tracing */
 	gpu_dbg_sched   = BIT(13), /* sched control tracing */
 	gpu_dbg_map_v   = BIT(14), /* verbose mem mappings */
+	gpu_dbg_sema	= BIT(15), /* semaphore debugging */
+	gpu_dbg_sema_v	= BIT(16), /* verbose semaphore debugging */
 	gpu_dbg_mem     = BIT(31), /* memory accesses, very verbose */
 };
 
diff --git a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c
index 396e7419..0fa31468 100644
--- a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c
@@ -29,12 +29,15 @@
 
 #define __lock_sema_sea(s)						\
 	do {								\
+		gpu_sema_verbose_dbg("Acquiring sema lock...");		\
 		mutex_lock(&s->sea_lock);				\
+		gpu_sema_verbose_dbg("Sema lock aquried!");		\
 	} while (0)
 
 #define __unlock_sema_sea(s)						\
 	do {								\
 		mutex_unlock(&s->sea_lock);				\
+		gpu_sema_verbose_dbg("Released sema lock");		\
 	} while (0)
 
 /*
@@ -89,11 +92,13 @@ struct gk20a_semaphore_sea *gk20a_semaphore_sea_create(struct gk20a *g)
 	if (__gk20a_semaphore_sea_grow(g->sema_sea))
 		goto cleanup;
 
+	gpu_sema_dbg("Created semaphore sea!");
 	return g->sema_sea;
 
 cleanup:
 	kfree(g->sema_sea);
 	g->sema_sea = NULL;
+	gpu_sema_dbg("Failed to creat semaphore sea!");
 	return NULL;
 }
 
@@ -144,11 +149,14 @@ struct gk20a_semaphore_pool *gk20a_semaphore_pool_alloc(
 	list_add(&p->pool_list_entry, &sea->pool_list);
 	__unlock_sema_sea(sea);
 
+	gpu_sema_dbg("Allocated semaphore pool: page-idx=%d", p->page_idx);
+
 	return p;
 
 fail:
 	__unlock_sema_sea(sea);
 	kfree(p);
+	gpu_sema_dbg("Failed to allocate semaphore pool!");
 	return ERR_PTR(err);
 }
 
@@ -162,9 +170,13 @@ int gk20a_semaphore_pool_map(struct gk20a_semaphore_pool *p,
 	int ents, err = 0;
 	u64 addr;
 
+	gpu_sema_dbg("Mapping sempahore pool! (idx=%d)", p->page_idx);
+
 	p->cpu_va = vmap(&p->page, 1, 0,
 			 pgprot_writecombine(PAGE_KERNEL));
 
+	gpu_sema_dbg("  %d: CPU VA = 0x%p!", p->page_idx, p->cpu_va);
+
 	/* First do the RW mapping. */
 	p->rw_sg_table = kzalloc(sizeof(*p->rw_sg_table), GFP_KERNEL);
 	if (!p->rw_sg_table)
@@ -185,6 +197,9 @@ int gk20a_semaphore_pool_map(struct gk20a_semaphore_pool *p,
 		goto fail_free_sgt;
 	}
 
+	gpu_sema_dbg("  %d: DMA addr = 0x%pad", p->page_idx,
+		     &sg_dma_address(p->rw_sg_table->sgl));
+
 	/* Map into the GPU... Doesn't need to be fixed. */
 	p->gpu_va = gk20a_gmmu_map(vm, &p->rw_sg_table, PAGE_SIZE,
 				   0, gk20a_mem_flag_none, false,
@@ -194,6 +209,9 @@ int gk20a_semaphore_pool_map(struct gk20a_semaphore_pool *p,
 		goto fail_unmap_sgt;
 	}
 
+	gpu_sema_dbg("  %d: GPU read-write VA = 0x%llx", p->page_idx,
+		     p->gpu_va);
+
 	/*
 	 * And now the global mapping. Take the sea lock so that we don't race
 	 * with a concurrent remap.
@@ -215,6 +233,9 @@ int gk20a_semaphore_pool_map(struct gk20a_semaphore_pool *p,
 	p->gpu_va_ro = addr;
 	p->mapped = 1;
 
+	gpu_sema_dbg("  %d: GPU read-only  VA = 0x%llx", p->page_idx,
+		     p->gpu_va_ro);
+
 	__unlock_sema_sea(p->sema_sea);
 
 	return 0;
@@ -229,6 +250,7 @@ fail_free_sgt:
 fail:
 	kfree(p->rw_sg_table);
 	p->rw_sg_table = NULL;
+	gpu_sema_dbg("  %d: Failed to map semaphore pool!", p->page_idx);
 	return err;
 }
 
@@ -260,13 +282,14 @@ void gk20a_semaphore_pool_unmap(struct gk20a_semaphore_pool *p,
 	kfree(p->rw_sg_table);
 	p->rw_sg_table = NULL;
 
-	gk20a_dbg_info("Unmapped sema-pool: idx = %d", p->page_idx);
 	list_for_each_entry(hw_sema, &p->hw_semas, hw_sema_list)
 		/*
 		 * Make sure the mem addresses are all NULL so if this gets
 		 * reused we will fault.
 		 */
 		hw_sema->value = NULL;
+
+	gpu_sema_dbg("Unmapped semaphore pool! (idx=%d)", p->page_idx);
 }
 
 /*
@@ -291,6 +314,7 @@ static void gk20a_semaphore_pool_free(struct kref *ref)
 	list_for_each_entry_safe(hw_sema, tmp, &p->hw_semas, hw_sema_list)
 		kfree(hw_sema);
 
+	gpu_sema_dbg("Freed semaphore pool! (idx=%d)", p->page_idx);
 	kfree(p);
 }
 
@@ -415,6 +439,8 @@ struct gk20a_semaphore *gk20a_semaphore_alloc(struct channel_gk20a *ch)
 	 */
 	gk20a_semaphore_pool_get(s->hw_sema->p);
 
+	gpu_sema_dbg("Allocated semaphore (c=%d)", ch->hw_chid);
+
 	return s;
 }
 
diff --git a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h
index 29a01ab8..d96037ce 100644
--- a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h
@@ -21,6 +21,12 @@
 #include "gk20a.h"
 #include "mm_gk20a.h"
 #include "channel_gk20a.h"
+#include "gk20a_allocator.h"
+
+#define gpu_sema_dbg(fmt, args...)		\
+	gk20a_dbg(gpu_dbg_sema, fmt, ##args)
+#define gpu_sema_verbose_dbg(fmt, args...)	\
+	gk20a_dbg(gpu_dbg_sema_v, fmt, ##args)
 
 /*
  * Max number of channels that can be used is 512. This of course needs to be
@@ -274,6 +280,9 @@ static inline void gk20a_semaphore_release(struct gk20a_semaphore *s)
 		return;
 
 	writel(val, s->hw_sema->value);
+
+	gpu_sema_verbose_dbg("(c=%d) WRITE %u",
+			     s->hw_sema->ch->hw_chid, val);
 }
 
 /*
@@ -291,5 +300,9 @@ static inline void gk20a_semaphore_incr(struct gk20a_semaphore *s)
 
 	atomic_set(&s->value, atomic_add_return(1, &s->hw_sema->next_value));
 	s->incremented = 1;
+
+	gpu_sema_verbose_dbg("INCR sema for c=%d (%u)",
+			     s->hw_sema->ch->hw_chid,
+			     gk20a_semaphore_next_value(s));
 }
 #endif
-- 
cgit v1.2.2