From 9da40c79fc36ea73cf682a7f4c76c8717bcf4fce Mon Sep 17 00:00:00 2001
From: Alex Waterman <alexw@nvidia.com>
Date: Mon, 7 Nov 2016 15:47:46 -0800
Subject: gpu: nvgpu: Store pending sema waits

Store pending sema waits so that they can be explicitly handled when
the driver dies. If the sema_wait is freed before the pending wait is
either handled or canceled problems occur.

Internally the sync_fence_wait_async() function uses the kernel timers.
That uses a linked list of possible events. That means every so often
the kernel iterates through this list. If the list node that is in the
sync_fence_waiter struct is freed before it can be removed from the
pending timers list then the kernel timers list can be corrupted. When
the kernel then iterates through this list crashes and other related
problems can happen.

Bug 1816516
Bug 1807277

Change-Id: Iddc4be64583c19bfdd2d88b9098aafc6ae5c6475
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: http://git-master/r/1250025
(cherry picked from commit 01889e21bd31dbd7ee85313e98079138ed1d63be)
Reviewed-on: http://git-master/r/1261920
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | 88 +++++++++++++++++++++++++++-
 drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h |  2 +
 drivers/gpu/nvgpu/gk20a/gk20a.h              |  4 ++
 drivers/gpu/nvgpu/nvgpu_common.c             |  3 +-
 4 files changed, 95 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
index ba8fbc98..c3c6fbb8 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -16,6 +16,8 @@
  */
 
 #include <linux/gk20a.h>
+
+#include <linux/list.h>
 #include <linux/version.h>
 
 #include "channel_sync_gk20a.h"
@@ -396,10 +398,82 @@ struct gk20a_channel_semaphore {
 #ifdef CONFIG_SYNC
 struct wait_fence_work {
 	struct sync_fence_waiter waiter;
+	struct sync_fence *fence;
 	struct channel_gk20a *ch;
 	struct gk20a_semaphore *sema;
+	struct gk20a *g;
+	struct list_head entry;
 };
 
+/*
+ * Keep track of all the pending waits on semaphores that exist for a GPU. This
+ * has to be done because the waits on fences backed by semaphores are
+ * asynchronous so it's impossible to otherwise know when they will fire. During
+ * driver cleanup this list can be checked and all existing waits can be
+ * canceled.
+ */
+static void gk20a_add_pending_sema_wait(struct gk20a *g,
+					struct wait_fence_work *work)
+{
+	raw_spin_lock(&g->pending_sema_waits_lock);
+	list_add(&work->entry, &g->pending_sema_waits);
+	raw_spin_unlock(&g->pending_sema_waits_lock);
+}
+
+/*
+ * Copy the list head from the pending wait list to the passed list and
+ * then delete the entire pending list.
+ */
+static void gk20a_start_sema_wait_cancel(struct gk20a *g,
+					 struct list_head *list)
+{
+	raw_spin_lock(&g->pending_sema_waits_lock);
+	list_replace_init(&g->pending_sema_waits, list);
+	raw_spin_unlock(&g->pending_sema_waits_lock);
+}
+
+/*
+ * During shutdown this should be called to make sure that any pending sema
+ * waits are canceled. This is a fairly delicate and tricky bit of code. Here's
+ * how it works.
+ *
+ * Every time a semaphore wait is initiated in SW the wait_fence_work struct is
+ * added to the pending_sema_waits list. When the semaphore launcher code runs
+ * it checks the pending_sema_waits list. If this list is non-empty that means
+ * that the wait_fence_work struct must be present and can be removed.
+ *
+ * When the driver shuts down one of the steps is to cancel pending sema waits.
+ * To do this the entire list of pending sema waits is removed (and stored in a
+ * separate local list). So now, if the semaphore launcher code runs it will see
+ * that the pending_sema_waits list is empty and knows that it no longer owns
+ * the wait_fence_work struct.
+ */
+void gk20a_channel_cancel_pending_sema_waits(struct gk20a *g)
+{
+	struct wait_fence_work *work;
+	struct list_head local_pending_sema_waits;
+
+	gk20a_start_sema_wait_cancel(g, &local_pending_sema_waits);
+
+	while (!list_empty(&local_pending_sema_waits)) {
+		int ret;
+
+		work = list_first_entry(&local_pending_sema_waits,
+					struct wait_fence_work,
+					entry);
+
+		list_del_init(&work->entry);
+
+		/*
+		 * Only kfree() work if the cancel is successful. Otherwise it's
+		 * in use by the gk20a_channel_semaphore_launcher() code.
+		 */
+		ret = sync_fence_cancel_async(work->fence, &work->waiter);
+		if (ret == 0)
+			kfree(work);
+	}
+}
+
 static void gk20a_channel_semaphore_launcher(
 		struct sync_fence *fence,
 		struct sync_fence_waiter *waiter)
@@ -407,7 +481,16 @@ static void gk20a_channel_semaphore_launcher(
 	int err;
 	struct wait_fence_work *w =
 		container_of(waiter, struct wait_fence_work, waiter);
-	struct gk20a *g = w->ch->g;
+	struct gk20a *g = w->g;
+
+	/*
+	 * This spinlock must protect a _very_ small critical section -
+	 * otherwise it's possible that the deterministic submit path suffers.
+	 */
+	raw_spin_lock(&g->pending_sema_waits_lock);
+	if (!list_empty(&g->pending_sema_waits))
+		list_del_init(&w->entry);
+	raw_spin_unlock(&g->pending_sema_waits_lock);
 
 	gk20a_dbg_info("waiting for pre fence %p '%s'",
 			fence, fence->name);
@@ -631,6 +714,8 @@ static int gk20a_channel_semaphore_wait_fd(
 	}
 
 	sync_fence_waiter_init(&w->waiter, gk20a_channel_semaphore_launcher);
+	w->fence = sync_fence;
+	w->g = c->g;
 	w->ch = c;
 	w->sema = gk20a_semaphore_alloc(c);
 	if (!w->sema) {
@@ -657,6 +742,7 @@ static int gk20a_channel_semaphore_wait_fd(
 		goto clean_up_sema;
 
 	ret = sync_fence_wait_async(sync_fence, &w->waiter);
+	gk20a_add_pending_sema_wait(c->g, w);
 
 	/*
 	 * If the sync_fence has already signaled then the above async_wait
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
index 5e75dd9b..063a5457 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
@@ -25,6 +25,7 @@ struct priv_cmd_entry;
 struct channel_gk20a;
 struct gk20a_semaphore;
 struct gk20a_fence;
+struct gk20a;
 
 struct gk20a_channel_sync {
 	atomic_t refcount;
@@ -102,5 +103,6 @@ struct gk20a_channel_sync {
 void gk20a_channel_sync_destroy(struct gk20a_channel_sync *sync);
 struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c);
 bool gk20a_channel_sync_needs_sync_framework(struct channel_gk20a *c);
+void gk20a_channel_cancel_pending_sema_waits(struct gk20a *g);
 
 #endif
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index a4cbb4b2..987dd517 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -874,6 +874,10 @@ struct gk20a {
 	 */
 	struct gk20a_semaphore_sea *sema_sea;
 
+	/* List of pending SW semaphore waits. */
+	struct list_head pending_sema_waits;
+	raw_spinlock_t pending_sema_waits_lock;
+
 	/* held while manipulating # of debug/profiler sessions present */
 	/* also prevents debug sessions from attaching until released */
 	struct mutex dbg_sessions_lock;
diff --git a/drivers/gpu/nvgpu/nvgpu_common.c b/drivers/gpu/nvgpu/nvgpu_common.c
index d50f2beb..4f0e883f 100644
--- a/drivers/gpu/nvgpu/nvgpu_common.c
+++ b/drivers/gpu/nvgpu/nvgpu_common.c
@@ -51,6 +51,8 @@ static void nvgpu_init_vars(struct gk20a *g)
 	g->dev->dma_parms = &g->dma_parms;
 	dma_set_max_seg_size(g->dev, UINT_MAX);
 
+	INIT_LIST_HEAD(&g->pending_sema_waits);
+	raw_spin_lock_init(&g->pending_sema_waits_lock);
 }
 
 static void nvgpu_init_timeout(struct gk20a *g)
@@ -219,4 +221,3 @@ const struct firmware *nvgpu_request_firmware(struct gk20a *g,
 
 	return fw;
 }
-
-- 
cgit v1.2.2