gpu: nvgpu: find unused cde context instead of lru

When preparing a new job, loop initially through the small number of preallocated contexts and try to find one that is already finished, instead of blindly getting the next slot in lru order. If all have work to do, select next in lru order. This reduces the possibility of a deadlock between cde tasks. Bug 200040211 Change-Id: Ib695c0a8e1bcec095d50ec4f2522f3aad39ce97b Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: http://git-master/r/552035 GVS: Gerrit_Virtual_Submit Reviewed-by: Arto Merilainen <amerilainen@nvidia.com>
author: Konsta Holtta <kholtta@nvidia.com> 2014-09-30 06:30:13 -0400
committer: Dan Willemsen <dwillemsen@nvidia.com> 2015-03-18 15:11:50 -0400
commit: 5ce4438380eeb7cf4cb5f53e80b2b28dd3ee0fe2 (patch)
tree: 3c6bf589333ee1207a5b00a1180e79664c1a83de /drivers/gpu/nvgpu
parent: 492b60fa2eb5c51b9539a0071323995bcafafbe6 (diff)
1 files changed, 28 insertions, 4 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
index e6dbaea8..1c230be6 100644
--- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
@@ -589,6 +589,33 @@ static int gk20a_cde_execute_buffer(struct gk20a_cde_ctx *cde_ctx,
                                           num_entries, flags, fence, fence_out);
 }
+static struct gk20a_cde_ctx *gk20a_cde_get_context(struct gk20a_cde_app *cde_app)
+{
+        struct gk20a_cde_ctx *cde_ctx = cde_app->cde_ctx;
+        int i;
+        /* try to find a jobless context */
+        for (i = 0; i < ARRAY_SIZE(cde_app->cde_ctx); i++, cde_ctx++) {
+                struct channel_gk20a *ch = cde_ctx->ch;
+                bool empty;
+                mutex_lock(&ch->jobs_lock);
+                empty = list_empty(&ch->jobs);
+                mutex_unlock(&ch->jobs_lock);
+                if (empty)
+                        return cde_ctx;
+        }
+        /* pick just the next cde context, hopefully somewhat in order */
+        cde_ctx = cde_app->cde_ctx + cde_app->cde_ctx_ptr;
+        cde_app->cde_ctx_ptr = (cde_app->cde_ctx_ptr + 1) %
+                        ARRAY_SIZE(cde_app->cde_ctx);
+        return cde_ctx;
+}
 int gk20a_cde_convert(struct gk20a *g,
                      struct dma_buf *dst,
                      s32 dst_kind, u64 dst_byte_offset,
@@ -610,10 +637,7 @@ int gk20a_cde_convert(struct gk20a *g,
        mutex_lock(&cde_app->mutex);
-        /* pick next free cde context */
+        cde_ctx = gk20a_cde_get_context(cde_app);
-        cde_ctx = cde_app->cde_ctx + cde_app->cde_ctx_ptr;
-        cde_app->cde_ctx_ptr = (cde_app->cde_ctx_ptr + 1) %
-                ARRAY_SIZE(cde_app->cde_ctx);
        /* First, map the buffers to local va */
author	Konsta Holtta <kholtta@nvidia.com>	2014-09-30 06:30:13 -0400
committer	Dan Willemsen <dwillemsen@nvidia.com>	2015-03-18 15:11:50 -0400
commit	5ce4438380eeb7cf4cb5f53e80b2b28dd3ee0fe2 (patch)
tree	3c6bf589333ee1207a5b00a1180e79664c1a83de /drivers/gpu/nvgpu
parent	492b60fa2eb5c51b9539a0071323995bcafafbe6 (diff)

diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c index e6dbaea8..1c230be6 100644 --- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
@@ -589,6 +589,33 @@ static int gk20a_cde_execute_buffer(struct gk20a_cde_ctx *cde_ctx,
589	num_entries, flags, fence, fence_out);	589	num_entries, flags, fence, fence_out);
590	}	590	}
591		591
		592	static struct gk20a_cde_ctx gk20a_cde_get_context(struct gk20a_cde_app cde_app)
		593	{
		594	struct gk20a_cde_ctx *cde_ctx = cde_app->cde_ctx;
		595	int i;
		596
		597	/* try to find a jobless context */
		598
		599	for (i = 0; i < ARRAY_SIZE(cde_app->cde_ctx); i++, cde_ctx++) {
		600	struct channel_gk20a *ch = cde_ctx->ch;
		601	bool empty;
		602
		603	mutex_lock(&ch->jobs_lock);
		604	empty = list_empty(&ch->jobs);
		605	mutex_unlock(&ch->jobs_lock);
		606
		607	if (empty)
		608	return cde_ctx;
		609	}
		610
		611	/* pick just the next cde context, hopefully somewhat in order */
		612	cde_ctx = cde_app->cde_ctx + cde_app->cde_ctx_ptr;
		613	cde_app->cde_ctx_ptr = (cde_app->cde_ctx_ptr + 1) %
		614	ARRAY_SIZE(cde_app->cde_ctx);
		615
		616	return cde_ctx;
		617	}
		618
592	int gk20a_cde_convert(struct gk20a *g,	619	int gk20a_cde_convert(struct gk20a *g,
593	struct dma_buf *dst,	620	struct dma_buf *dst,
594	s32 dst_kind, u64 dst_byte_offset,	621	s32 dst_kind, u64 dst_byte_offset,
@@ -610,10 +637,7 @@ int gk20a_cde_convert(struct gk20a *g,
610		637
611	mutex_lock(&cde_app->mutex);	638	mutex_lock(&cde_app->mutex);
612		639
613	/* pick next free cde context */	640	cde_ctx = gk20a_cde_get_context(cde_app);
614	cde_ctx = cde_app->cde_ctx + cde_app->cde_ctx_ptr;
615	cde_app->cde_ctx_ptr = (cde_app->cde_ctx_ptr + 1) %
616	ARRAY_SIZE(cde_app->cde_ctx);
617		641
618	/* First, map the buffers to local va */	642	/* First, map the buffers to local va */
619		643