From 2d40ebb1caa313d5d12a13f15b2623faa3ad914b Mon Sep 17 00:00:00 2001
From: Deepak Nibade <dnibade@nvidia.com>
Date: Wed, 4 Nov 2015 14:06:37 +0530
Subject: gpu: nvgpu: rework private command buffer free path

We currently allocate private command buffers (wait_cmd
and incr_cmd) before submitting the job but we never
free them explicitly.
When private command queue of the channel is full, we
then try to recycle/remove free command buffers.
But this recycling happens during submit path, and
hence that particular submit path takes much longer

Rework this as below :
- add reference of command buffers to job structure
- when job completes, free the command buffers
  explicitly
- remove the code to recycle buffers since it should
  not be needed now

Note that command buffers need to be freed in order of
their allocation. Ensure this with error print before
freeing the command buffer entry

Bug 200141116
Bug 1698667

Change-Id: Id4b69429d7ad966307e0d122a71ad55076684307
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/827638
(cherry picked from commit c6cefd69b71c9b70d6df5343b13dfcfb3fa99598)
Reviewed-on: http://git-master/r/835802
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 121 +++++++++-----------------------
 drivers/gpu/nvgpu/gk20a/channel_gk20a.h |   2 +
 drivers/gpu/nvgpu/gk20a/mm_gk20a.h      |   2 -
 3 files changed, 34 insertions(+), 91 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 9bacb5c9..6015ab5e 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -47,7 +47,6 @@ static void free_channel(struct fifo_gk20a *f, struct channel_gk20a *c);
 
 static void free_priv_cmdbuf(struct channel_gk20a *c,
 			     struct priv_cmd_entry *e);
-static void recycle_priv_cmdbuf(struct channel_gk20a *c);
 
 static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c);
 static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c);
@@ -1179,9 +1178,6 @@ static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c)
 
 	q->size = q->mem.size / sizeof (u32);
 
-	INIT_LIST_HEAD(&q->head);
-	INIT_LIST_HEAD(&q->free);
-
 	return 0;
 
 clean_up:
@@ -1193,28 +1189,12 @@ static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c)
 {
 	struct vm_gk20a *ch_vm = c->vm;
 	struct priv_cmd_queue *q = &c->priv_cmd_q;
-	struct priv_cmd_entry *e;
-	struct list_head *pos, *tmp, *head;
 
 	if (q->size == 0)
 		return;
 
 	gk20a_gmmu_unmap_free(ch_vm, &q->mem);
 
-	/* free used list */
-	head = &q->head;
-	list_for_each_safe(pos, tmp, head) {
-		e = container_of(pos, struct priv_cmd_entry, list);
-		free_priv_cmdbuf(c, e);
-	}
-
-	/* free free list */
-	head = &q->free;
-	list_for_each_safe(pos, tmp, head) {
-		e = container_of(pos, struct priv_cmd_entry, list);
-		kfree(e);
-	}
-
 	memset(q, 0, sizeof(struct priv_cmd_queue));
 }
 
@@ -1226,7 +1206,6 @@ int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size,
 	struct priv_cmd_entry *e;
 	u32 free_count;
 	u32 size = orig_size;
-	bool no_retry = false;
 
 	gk20a_dbg_fn("size %d", orig_size);
 
@@ -1240,17 +1219,10 @@ int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size,
 	gk20a_dbg_info("ch %d: priv cmd queue get:put %d:%d",
 			c->hw_chid, q->get, q->put);
 
-TRY_AGAIN:
 	free_count = (q->size - (q->put - q->get) - 1) % q->size;
 
-	if (size > free_count) {
-		if (!no_retry) {
-			recycle_priv_cmdbuf(c);
-			no_retry = true;
-			goto TRY_AGAIN;
-		} else
-			return -EAGAIN;
-	}
+	if (size > free_count)
+		return -EAGAIN;
 
 	e = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL);
 	if (!e) {
@@ -1280,9 +1252,6 @@ TRY_AGAIN:
 	/* we already handled q->put + size > q->size so BUG_ON this */
 	BUG_ON(q->put > q->size);
 
-	/* add new entry to head since we free from head */
-	list_add(&e->list, &q->head);
-
 	*entry = e;
 
 	gk20a_dbg_fn("done");
@@ -1295,65 +1264,9 @@ TRY_AGAIN:
 static void free_priv_cmdbuf(struct channel_gk20a *c,
 			     struct priv_cmd_entry *e)
 {
-	if (!e)
-		return;
-
-	list_del(&e->list);
-
 	kfree(e);
 }
 
-/* free entries if they're no longer being used */
-static void recycle_priv_cmdbuf(struct channel_gk20a *c)
-{
-	struct priv_cmd_queue *q = &c->priv_cmd_q;
-	struct priv_cmd_entry *e, *tmp;
-	struct list_head *head = &q->head;
-	bool wrap_around, found = false;
-
-	gk20a_dbg_fn("");
-
-	/* Find the most recent free entry. Free it and everything before it */
-	list_for_each_entry(e, head, list) {
-
-		gk20a_dbg_info("ch %d: cmd entry get:put:wrap %d:%d:%d "
-			"curr get:put:wrap %d:%d:%d",
-			c->hw_chid, e->gp_get, e->gp_put, e->gp_wrap,
-			c->gpfifo.get, c->gpfifo.put, c->gpfifo.wrap);
-
-		wrap_around = (c->gpfifo.wrap != e->gp_wrap);
-		if (e->gp_get < e->gp_put) {
-			if (c->gpfifo.get >= e->gp_put ||
-			    wrap_around) {
-				found = true;
-				break;
-			} else
-				e->gp_get = c->gpfifo.get;
-		} else if (e->gp_get > e->gp_put) {
-			if (wrap_around &&
-			    c->gpfifo.get >= e->gp_put) {
-				found = true;
-				break;
-			} else
-				e->gp_get = c->gpfifo.get;
-		}
-	}
-
-	if (found)
-		q->get = (e->ptr - (u32 *)q->mem.cpu_va) + e->size;
-	else {
-		gk20a_dbg_info("no free entry recycled");
-		return;
-	}
-
-	list_for_each_entry_safe_continue(e, tmp, head, list) {
-		free_priv_cmdbuf(c, e);
-	}
-
-	gk20a_dbg_fn("done");
-}
-
-
 int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
 		struct nvgpu_alloc_gpfifo_args *args)
 {
@@ -1724,9 +1637,31 @@ fail_unlock:
 	gk20a_channel_put(ch);
 }
 
+static int gk20a_free_priv_cmdbuf(struct channel_gk20a *c,
+					struct priv_cmd_entry *e)
+{
+	struct priv_cmd_queue *q = &c->priv_cmd_q;
+	u32 cmd_entry_start;
+	struct device *d = dev_from_gk20a(c->g);
+
+	if (!e)
+		return 0;
+
+	cmd_entry_start = (u32)(e->ptr - (u32 *)q->mem.cpu_va);
+	if ((q->get != cmd_entry_start) && cmd_entry_start != 0)
+		gk20a_err(d, "requests out-of-order, ch=%d\n", c->hw_chid);
+
+	q->get = (e->ptr - (u32 *)q->mem.cpu_va) + e->size;
+	free_priv_cmdbuf(c, e);
+
+	return 0;
+}
+
 static int gk20a_channel_add_job(struct channel_gk20a *c,
 				 struct gk20a_fence *pre_fence,
 				 struct gk20a_fence *post_fence,
+				 struct priv_cmd_entry *wait_cmd,
+				 struct priv_cmd_entry *incr_cmd,
 				 bool skip_buffer_refcounting)
 {
 	struct vm_gk20a *vm = c->vm;
@@ -1761,6 +1696,8 @@ static int gk20a_channel_add_job(struct channel_gk20a *c,
 		job->mapped_buffers = mapped_buffers;
 		job->pre_fence = gk20a_fence_get(pre_fence);
 		job->post_fence = gk20a_fence_get(post_fence);
+		job->wait_cmd = wait_cmd;
+		job->incr_cmd = incr_cmd;
 
 		gk20a_channel_timeout_start(c, job);
 
@@ -1808,6 +1745,11 @@ void gk20a_channel_update(struct channel_gk20a *c, int nr_completed)
 		gk20a_fence_put(job->pre_fence);
 		gk20a_fence_put(job->post_fence);
 
+		/* Free the private command buffers (wait_cmd first and
+		 * then incr_cmd i.e. order of allocation) */
+		gk20a_free_priv_cmdbuf(c, job->wait_cmd);
+		gk20a_free_priv_cmdbuf(c, job->incr_cmd);
+
 		/* job is done. release its vm reference (taken in add_job) */
 		gk20a_vm_put(vm);
 		/* another bookkeeping taken in add_job. caller must hold a ref
@@ -2114,6 +2056,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
 
 	/* TODO! Check for errors... */
 	gk20a_channel_add_job(c, pre_fence, post_fence,
+				wait_cmd, incr_cmd,
 				skip_buffer_refcounting);
 
 	c->cmds_pending = true;
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
index d5f5e6a2..245db56a 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -62,6 +62,8 @@ struct channel_gk20a_job {
 	int num_mapped_buffers;
 	struct gk20a_fence *pre_fence;
 	struct gk20a_fence *post_fence;
+	struct priv_cmd_entry *wait_cmd;
+	struct priv_cmd_entry *incr_cmd;
 	struct list_head list;
 };
 
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 2dd4ccf5..ac55e988 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -160,8 +160,6 @@ struct priv_cmd_queue {
 	u32 size;	/* num of entries in words */
 	u32 put;	/* put for priv cmd queue */
 	u32 get;	/* get for priv cmd queue */
-	struct list_head free;	/* list of pre-allocated free entries */
-	struct list_head head;	/* list of used entries */
 };
 
 struct priv_cmd_entry {
-- 
cgit v1.2.2