From c6b9177cfff8a41c3c3c78f5c47c7df677ced58c Mon Sep 17 00:00:00 2001
From: Deepak Nibade <dnibade@nvidia.com>
Date: Mon, 6 Nov 2017 05:44:23 -0800
Subject: gpu: nvgpu: define error_notifiers in common code

All the linux specific error_notifier codes are defined in linux specific
header file <uapi/linux/nvgpu.h> and used in all the common driver

But since they are defined in linux specific file, we need to move all the
uses of those error_notifiers in linux specific code only

Hence define new error_notifiers in include/nvgpu/error_notifier.h and
use them in the common code

Add new API nvgpu_error_notifier_to_channel_notifier() to convert common
error_notifier of the form NVGPU_ERR_NOTIFIER_* to linux specific error
notifier of the form NVGPU_CHANNEL_*

Any future additions to error notifiers requires update to both the form
of error notifiers

Move all error notifier related metadata from channel_gk20a (common code)
to linux specific structure nvgpu_channel_linux
Update all accesses to this data from new structure instead of channel_gk20a

Move and rename below APIs to linux specific file and declare them
in error_notifier.h
nvgpu_set_error_notifier_locked()
nvgpu_set_error_notifier()
nvgpu_is_error_notifier_set()

Add below new API and use it in fifo_vgpu.c
nvgpu_set_error_notifier_if_empty()

Include <nvgpu/error_notifier.h> wherever new error_notifier codes are used

NVGPU-426

Change-Id: Iaa5bfc150e6e9ec17d797d445c2d6407afe9f4bd
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1593361
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/common/linux/channel.c         | 131 ++++++++++++++++++++++-
 drivers/gpu/nvgpu/common/linux/channel.h         |  10 ++
 drivers/gpu/nvgpu/common/linux/ioctl_channel.c   |  35 +++---
 drivers/gpu/nvgpu/common/linux/vgpu/fifo_vgpu.c  |  34 +++---
 drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.c    |  25 ++---
 drivers/gpu/nvgpu/gk20a/channel_gk20a.c          |  65 +++--------
 drivers/gpu/nvgpu/gk20a/channel_gk20a.h          |   7 --
 drivers/gpu/nvgpu/gk20a/fifo_gk20a.c             |  46 ++++----
 drivers/gpu/nvgpu/gk20a/gr_gk20a.c               |  21 ++--
 drivers/gpu/nvgpu/include/nvgpu/error_notifier.h |  49 +++++++++
 10 files changed, 285 insertions(+), 138 deletions(-)
 create mode 100644 drivers/gpu/nvgpu/include/nvgpu/error_notifier.h

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/nvgpu/common/linux/channel.c b/drivers/gpu/nvgpu/common/linux/channel.c
index 8366ed88..1ae2d444 100644
--- a/drivers/gpu/nvgpu/common/linux/channel.c
+++ b/drivers/gpu/nvgpu/common/linux/channel.c
@@ -17,6 +17,7 @@
 #include <nvgpu/enabled.h>
 #include <nvgpu/debug.h>
 #include <nvgpu/ltc.h>
+#include <nvgpu/error_notifier.h>
 
 /*
  * This is required for nvgpu_vm_find_buf() which is used in the tracing
@@ -37,6 +38,124 @@
 #include <trace/events/gk20a.h>
 #include <uapi/linux/nvgpu.h>
 
+/*
+ * API to convert error_notifiers in common code and of the form
+ * NVGPU_ERR_NOTIFIER_* into Linux specific error_notifiers exposed to user
+ * space and of the form  NVGPU_CHANNEL_*
+ */
+static u32 nvgpu_error_notifier_to_channel_notifier(u32 error_notifier)
+{
+	switch (error_notifier) {
+	case NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT:
+		return NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT;
+	case NVGPU_ERR_NOTIFIER_GR_ERROR_SW_METHOD:
+		return NVGPU_CHANNEL_GR_ERROR_SW_METHOD;
+	case NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY:
+		return NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY;
+	case NVGPU_ERR_NOTIFIER_GR_EXCEPTION:
+		return NVGPU_CHANNEL_GR_EXCEPTION;
+	case NVGPU_ERR_NOTIFIER_GR_SEMAPHORE_TIMEOUT:
+		return NVGPU_CHANNEL_GR_SEMAPHORE_TIMEOUT;
+	case NVGPU_ERR_NOTIFIER_GR_ILLEGAL_NOTIFY:
+		return NVGPU_CHANNEL_GR_ILLEGAL_NOTIFY;
+	case NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT:
+		return NVGPU_CHANNEL_FIFO_ERROR_MMU_ERR_FLT;
+	case NVGPU_ERR_NOTIFIER_PBDMA_ERROR:
+		return NVGPU_CHANNEL_PBDMA_ERROR;
+	case NVGPU_ERR_NOTIFIER_FECS_ERR_UNIMP_FIRMWARE_METHOD:
+		return NVGPU_CHANNEL_FECS_ERR_UNIMP_FIRMWARE_METHOD;
+	case NVGPU_ERR_NOTIFIER_RESETCHANNEL_VERIF_ERROR:
+		return NVGPU_CHANNEL_RESETCHANNEL_VERIF_ERROR;
+	case NVGPU_ERR_NOTIFIER_PBDMA_PUSHBUFFER_CRC_MISMATCH:
+		return NVGPU_CHANNEL_PBDMA_PUSHBUFFER_CRC_MISMATCH;
+	}
+
+	pr_warn("%s: invalid error_notifier requested %u\n", __func__, error_notifier);
+
+	return error_notifier;
+}
+
+/**
+ * nvgpu_set_error_notifier_locked()
+ * Should be called with ch->error_notifier_mutex held
+ *
+ * error should be of the form  NVGPU_ERR_NOTIFIER_*
+ */
+void nvgpu_set_error_notifier_locked(struct channel_gk20a *ch, u32 error)
+{
+	struct nvgpu_channel_linux *priv = ch->os_priv;
+
+	error = nvgpu_error_notifier_to_channel_notifier(error);
+
+	if (priv->error_notifier.dmabuf) {
+		struct nvgpu_notification *notification =
+			priv->error_notifier.notification;
+		struct timespec time_data;
+		u64 nsec;
+
+		getnstimeofday(&time_data);
+		nsec = ((u64)time_data.tv_sec) * 1000000000u +
+				(u64)time_data.tv_nsec;
+		notification->time_stamp.nanoseconds[0] =
+				(u32)nsec;
+		notification->time_stamp.nanoseconds[1] =
+				(u32)(nsec >> 32);
+		notification->info32 = error;
+		notification->status = 0xffff;
+
+		nvgpu_err(ch->g,
+		    "error notifier set to %d for ch %d", error, ch->chid);
+	}
+}
+
+/* error should be of the form  NVGPU_ERR_NOTIFIER_* */
+void nvgpu_set_error_notifier(struct channel_gk20a *ch, u32 error)
+{
+	struct nvgpu_channel_linux *priv = ch->os_priv;
+
+	nvgpu_mutex_acquire(&priv->error_notifier.mutex);
+	nvgpu_set_error_notifier_locked(ch, error);
+	nvgpu_mutex_release(&priv->error_notifier.mutex);
+}
+
+void nvgpu_set_error_notifier_if_empty(struct channel_gk20a *ch, u32 error)
+{
+	struct nvgpu_channel_linux *priv = ch->os_priv;
+
+	nvgpu_mutex_acquire(&priv->error_notifier.mutex);
+	if (priv->error_notifier.dmabuf) {
+		struct nvgpu_notification *notification =
+			priv->error_notifier.notification;
+
+		/* Don't overwrite error flag if it is already set */
+		if (notification->status != 0xffff)
+			nvgpu_set_error_notifier_locked(ch, error);
+	}
+	nvgpu_mutex_release(&priv->error_notifier.mutex);
+}
+
+/* error_notifier should be of the form  NVGPU_ERR_NOTIFIER_* */
+bool nvgpu_is_error_notifier_set(struct channel_gk20a *ch, u32 error_notifier)
+{
+	struct nvgpu_channel_linux *priv = ch->os_priv;
+	bool notifier_set = false;
+
+	error_notifier = nvgpu_error_notifier_to_channel_notifier(error_notifier);
+
+	nvgpu_mutex_acquire(&priv->error_notifier.mutex);
+	if (priv->error_notifier.dmabuf) {
+		struct nvgpu_notification *notification =
+			priv->error_notifier.notification;
+		u32 err = notification->info32;
+
+		if (err == error_notifier)
+			notifier_set = true;
+	}
+	nvgpu_mutex_release(&priv->error_notifier.mutex);
+
+	return notifier_set;
+}
+
 static void gk20a_channel_update_runcb_fn(struct work_struct *work)
 {
 	struct nvgpu_channel_completion_cb *completion_cb =
@@ -128,6 +247,7 @@ static void nvgpu_channel_close_linux(struct channel_gk20a *ch)
 static int nvgpu_channel_alloc_linux(struct gk20a *g, struct channel_gk20a *ch)
 {
 	struct nvgpu_channel_linux *priv;
+	int err;
 
 	priv = nvgpu_kzalloc(g, sizeof(*priv));
 	if (!priv)
@@ -136,6 +256,12 @@ static int nvgpu_channel_alloc_linux(struct gk20a *g, struct channel_gk20a *ch)
 	ch->os_priv = priv;
 	priv->ch = ch;
 
+	err = nvgpu_mutex_init(&priv->error_notifier.mutex);
+	if (err) {
+		nvgpu_kfree(g, priv);
+		return err;
+	}
+
 	nvgpu_channel_work_completion_init(ch);
 
 	return 0;
@@ -143,7 +269,10 @@ static int nvgpu_channel_alloc_linux(struct gk20a *g, struct channel_gk20a *ch)
 
 static void nvgpu_channel_free_linux(struct gk20a *g, struct channel_gk20a *ch)
 {
-	nvgpu_kfree(g, ch->os_priv);
+	struct nvgpu_channel_linux *priv = ch->os_priv;
+
+	nvgpu_mutex_destroy(&priv->error_notifier.mutex);
+	nvgpu_kfree(g, priv);
 }
 
 int nvgpu_init_channel_support_linux(struct nvgpu_os_linux *l)
diff --git a/drivers/gpu/nvgpu/common/linux/channel.h b/drivers/gpu/nvgpu/common/linux/channel.h
index ba1935f3..a4df75d6 100644
--- a/drivers/gpu/nvgpu/common/linux/channel.h
+++ b/drivers/gpu/nvgpu/common/linux/channel.h
@@ -42,10 +42,20 @@ struct nvgpu_channel_completion_cb {
 	struct work_struct work;
 };
 
+struct nvgpu_error_notifier {
+	struct dma_buf *dmabuf;
+	void *vaddr;
+
+	struct nvgpu_notification *notification;
+
+	struct nvgpu_mutex mutex;
+};
+
 struct nvgpu_channel_linux {
 	struct channel_gk20a *ch;
 
 	struct nvgpu_channel_completion_cb completion_cb;
+	struct nvgpu_error_notifier error_notifier;
 };
 
 int nvgpu_init_channel_support_linux(struct nvgpu_os_linux *l);
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_channel.c b/drivers/gpu/nvgpu/common/linux/ioctl_channel.c
index 0ac50140..67bec31b 100644
--- a/drivers/gpu/nvgpu/common/linux/ioctl_channel.c
+++ b/drivers/gpu/nvgpu/common/linux/ioctl_channel.c
@@ -30,6 +30,7 @@
 #include <nvgpu/list.h>
 #include <nvgpu/debug.h>
 #include <nvgpu/enabled.h>
+#include <nvgpu/error_notifier.h>
 
 #include "gk20a/gk20a.h"
 #include "gk20a/dbg_gpu_gk20a.h"
@@ -227,15 +228,17 @@ static int gk20a_channel_set_wdt_status(struct channel_gk20a *ch,
 
 static void gk20a_channel_free_error_notifiers(struct channel_gk20a *ch)
 {
-	nvgpu_mutex_acquire(&ch->error_notifier_mutex);
-	if (ch->error_notifier_ref) {
-		dma_buf_vunmap(ch->error_notifier_ref, ch->error_notifier_va);
-		dma_buf_put(ch->error_notifier_ref);
-		ch->error_notifier_ref = NULL;
-		ch->error_notifier = NULL;
-		ch->error_notifier_va = NULL;
+	struct nvgpu_channel_linux *priv = ch->os_priv;
+
+	nvgpu_mutex_acquire(&priv->error_notifier.mutex);
+	if (priv->error_notifier.dmabuf) {
+		dma_buf_vunmap(priv->error_notifier.dmabuf, priv->error_notifier.vaddr);
+		dma_buf_put(priv->error_notifier.dmabuf);
+		priv->error_notifier.dmabuf = NULL;
+		priv->error_notifier.notification = NULL;
+		priv->error_notifier.vaddr = NULL;
 	}
-	nvgpu_mutex_release(&ch->error_notifier_mutex);
+	nvgpu_mutex_release(&priv->error_notifier.mutex);
 }
 
 static int gk20a_init_error_notifier(struct channel_gk20a *ch,
@@ -244,6 +247,7 @@ static int gk20a_init_error_notifier(struct channel_gk20a *ch,
 	struct dma_buf *dmabuf;
 	void *va;
 	u64 end = args->offset + sizeof(struct nvgpu_notification);
+	struct nvgpu_channel_linux *priv = ch->os_priv;
 
 	if (!args->mem) {
 		pr_err("gk20a_init_error_notifier: invalid memory handle\n");
@@ -273,14 +277,15 @@ static int gk20a_init_error_notifier(struct channel_gk20a *ch,
 		return -ENOMEM;
 	}
 
-	ch->error_notifier = va + args->offset;
-	ch->error_notifier_va = va;
-	memset(ch->error_notifier, 0, sizeof(struct nvgpu_notification));
+	priv->error_notifier.notification = va + args->offset;
+	priv->error_notifier.vaddr = va;
+	memset(priv->error_notifier.notification, 0,
+		sizeof(struct nvgpu_notification));
 
 	/* set channel notifiers pointer */
-	nvgpu_mutex_acquire(&ch->error_notifier_mutex);
-	ch->error_notifier_ref = dmabuf;
-	nvgpu_mutex_release(&ch->error_notifier_mutex);
+	nvgpu_mutex_acquire(&priv->error_notifier.mutex);
+	priv->error_notifier.dmabuf = dmabuf;
+	nvgpu_mutex_release(&priv->error_notifier.mutex);
 
 	return 0;
 }
@@ -1361,7 +1366,7 @@ long gk20a_channel_ioctl(struct file *filp,
 			break;
 		}
 		err = ch->g->ops.fifo.force_reset_ch(ch,
-				NVGPU_CHANNEL_RESETCHANNEL_VERIF_ERROR, true);
+				NVGPU_ERR_NOTIFIER_RESETCHANNEL_VERIF_ERROR, true);
 		gk20a_idle(ch->g);
 		break;
 	case NVGPU_IOCTL_CHANNEL_EVENT_ID_CTRL:
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/fifo_vgpu.c b/drivers/gpu/nvgpu/common/linux/vgpu/fifo_vgpu.c
index cdcecca5..7a2a02e9 100644
--- a/drivers/gpu/nvgpu/common/linux/vgpu/fifo_vgpu.c
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/fifo_vgpu.c
@@ -25,10 +25,13 @@
 #include <nvgpu/atomic.h>
 #include <nvgpu/bug.h>
 #include <nvgpu/barrier.h>
+#include <nvgpu/error_notifier.h>
 
 #include "vgpu.h"
 #include "fifo_vgpu.h"
 
+#include "common/linux/channel.h"
+
 #include <nvgpu/hw/gk20a/hw_fifo_gk20a.h>
 #include <nvgpu/hw/gk20a/hw_ram_gk20a.h>
 
@@ -691,7 +694,7 @@ int vgpu_fifo_force_reset_ch(struct channel_gk20a *ch,
 
 		list_for_each_entry(ch_tsg, &tsg->ch_list, ch_entry) {
 			if (gk20a_channel_get(ch_tsg)) {
-				gk20a_set_error_notifier(ch_tsg, err_code);
+				nvgpu_set_error_notifier(ch_tsg, err_code);
 				ch_tsg->has_timedout = true;
 				gk20a_channel_put(ch_tsg);
 			}
@@ -699,7 +702,7 @@ int vgpu_fifo_force_reset_ch(struct channel_gk20a *ch,
 
 		nvgpu_rwsem_up_read(&tsg->ch_list_lock);
 	} else {
-		gk20a_set_error_notifier(ch, err_code);
+		nvgpu_set_error_notifier(ch, err_code);
 		ch->has_timedout = true;
 	}
 
@@ -716,19 +719,14 @@ int vgpu_fifo_force_reset_ch(struct channel_gk20a *ch,
 static void vgpu_fifo_set_ctx_mmu_error_ch(struct gk20a *g,
 		struct channel_gk20a *ch)
 {
-	nvgpu_mutex_acquire(&ch->error_notifier_mutex);
-	if (ch->error_notifier_ref) {
-		if (ch->error_notifier->status == 0xffff) {
-			/* If error code is already set, this mmu fault
-			 * was triggered as part of recovery from other
-			 * error condition.
-			 * Don't overwrite error flag. */
-		} else {
-			gk20a_set_error_notifier_locked(ch,
-				NVGPU_CHANNEL_FIFO_ERROR_MMU_ERR_FLT);
-		}
-	}
-	nvgpu_mutex_release(&ch->error_notifier_mutex);
+	/*
+	 * If error code is already set, this mmu fault
+	 * was triggered as part of recovery from other
+	 * error condition.
+	 * Don't overwrite error flag.
+	 */
+	nvgpu_set_error_notifier_if_empty(ch,
+		NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT);
 
 	/* mark channel as faulted */
 	ch->has_timedout = true;
@@ -778,11 +776,11 @@ int vgpu_fifo_isr(struct gk20a *g, struct tegra_vgpu_fifo_intr_info *info)
 
 	switch (info->type) {
 	case TEGRA_VGPU_FIFO_INTR_PBDMA:
-		gk20a_set_error_notifier(ch, NVGPU_CHANNEL_PBDMA_ERROR);
+		nvgpu_set_error_notifier(ch, NVGPU_ERR_NOTIFIER_PBDMA_ERROR);
 		break;
 	case TEGRA_VGPU_FIFO_INTR_CTXSW_TIMEOUT:
-		gk20a_set_error_notifier(ch,
-					NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT);
+		nvgpu_set_error_notifier(ch,
+					NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
 		break;
 	case TEGRA_VGPU_FIFO_INTR_MMU_FAULT:
 		vgpu_fifo_set_ctx_mmu_error_ch_tsg(g, ch);
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.c b/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.c
index dd2ae306..33551d17 100644
--- a/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.c
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.c
@@ -20,6 +20,7 @@
 
 #include <nvgpu/kmem.h>
 #include <nvgpu/bug.h>
+#include <nvgpu/error_notifier.h>
 
 #include "vgpu.h"
 #include "gr_vgpu.h"
@@ -941,31 +942,31 @@ int vgpu_gr_isr(struct gk20a *g, struct tegra_vgpu_gr_intr_info *info)
 		nvgpu_cond_broadcast_interruptible(&ch->semaphore_wq);
 		break;
 	case TEGRA_VGPU_GR_INTR_SEMAPHORE_TIMEOUT:
-		gk20a_set_error_notifier(ch,
-				NVGPU_CHANNEL_GR_SEMAPHORE_TIMEOUT);
+		nvgpu_set_error_notifier(ch,
+				NVGPU_ERR_NOTIFIER_GR_SEMAPHORE_TIMEOUT);
 		break;
 	case TEGRA_VGPU_GR_INTR_ILLEGAL_NOTIFY:
-		gk20a_set_error_notifier(ch,
-					NVGPU_CHANNEL_GR_ILLEGAL_NOTIFY);
+		nvgpu_set_error_notifier(ch,
+					NVGPU_ERR_NOTIFIER_GR_ILLEGAL_NOTIFY);
 	case TEGRA_VGPU_GR_INTR_ILLEGAL_METHOD:
 		break;
 	case TEGRA_VGPU_GR_INTR_ILLEGAL_CLASS:
-		gk20a_set_error_notifier(ch,
-					NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY);
+		nvgpu_set_error_notifier(ch,
+					NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY);
 		break;
 	case TEGRA_VGPU_GR_INTR_FECS_ERROR:
 		break;
 	case TEGRA_VGPU_GR_INTR_CLASS_ERROR:
-		gk20a_set_error_notifier(ch,
-					NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY);
+		nvgpu_set_error_notifier(ch,
+					NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY);
 		break;
 	case TEGRA_VGPU_GR_INTR_FIRMWARE_METHOD:
-		gk20a_set_error_notifier(ch,
-				NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY);
+		nvgpu_set_error_notifier(ch,
+				NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY);
 		break;
 	case TEGRA_VGPU_GR_INTR_EXCEPTION:
-		gk20a_set_error_notifier(ch,
-				NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY);
+		nvgpu_set_error_notifier(ch,
+				NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY);
 		break;
 	case TEGRA_VGPU_GR_INTR_SM_EXCEPTION:
 		gk20a_dbg_gpu_post_events(ch);
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index e01d6cdb..a0415861 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -44,6 +44,7 @@
 #include <nvgpu/ltc.h>
 #include <nvgpu/barrier.h>
 #include <nvgpu/ctxsw_trace.h>
+#include <nvgpu/error_notifier.h>
 
 #include "gk20a.h"
 #include "dbg_gpu_gk20a.h"
@@ -339,37 +340,6 @@ int gk20a_channel_set_runlist_interleave(struct channel_gk20a *ch,
 	return ret ? ret : g->ops.fifo.update_runlist(g, ch->runlist_id, ~0, true, true);
 }
 
-/**
- * gk20a_set_error_notifier_locked()
- * Should be called with ch->error_notifier_mutex held
- */
-void gk20a_set_error_notifier_locked(struct channel_gk20a *ch, __u32 error)
-{
-	if (ch->error_notifier_ref) {
-		struct timespec time_data;
-		u64 nsec;
-		getnstimeofday(&time_data);
-		nsec = ((u64)time_data.tv_sec) * 1000000000u +
-				(u64)time_data.tv_nsec;
-		ch->error_notifier->time_stamp.nanoseconds[0] =
-				(u32)nsec;
-		ch->error_notifier->time_stamp.nanoseconds[1] =
-				(u32)(nsec >> 32);
-		ch->error_notifier->info32 = error;
-		ch->error_notifier->status = 0xffff;
-
-		nvgpu_err(ch->g,
-		    "error notifier set to %d for ch %d", error, ch->chid);
-	}
-}
-
-void gk20a_set_error_notifier(struct channel_gk20a *ch, __u32 error)
-{
-	nvgpu_mutex_acquire(&ch->error_notifier_mutex);
-	gk20a_set_error_notifier_locked(ch, error);
-	nvgpu_mutex_release(&ch->error_notifier_mutex);
-}
-
 static void gk20a_wait_until_counter_is_N(
 	struct channel_gk20a *ch, nvgpu_atomic_t *counter, int wait_value,
 	struct nvgpu_cond *c, const char *caller, const char *counter_name)
@@ -1550,7 +1520,7 @@ static void gk20a_channel_timeout_handler(struct channel_gk20a *ch)
 	gk20a_gr_debug_dump(g);
 
 	g->ops.fifo.force_reset_ch(ch,
-		NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT, true);
+		NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT, true);
 }
 
 /**
@@ -2210,53 +2180,48 @@ int gk20a_init_channel_support(struct gk20a *g, u32 chid)
 	err = nvgpu_mutex_init(&c->ioctl_lock);
 	if (err)
 		return err;
-	err = nvgpu_mutex_init(&c->error_notifier_mutex);
-	if (err)
-		goto fail_1;
 	err = nvgpu_mutex_init(&c->joblist.cleanup_lock);
 	if (err)
-		goto fail_2;
+		goto fail_1;
 	err = nvgpu_mutex_init(&c->joblist.pre_alloc.read_lock);
 	if (err)
-		goto fail_3;
+		goto fail_2;
 	err = nvgpu_mutex_init(&c->sync_lock);
 	if (err)
-		goto fail_4;
+		goto fail_3;
 #if defined(CONFIG_GK20A_CYCLE_STATS)
 	err = nvgpu_mutex_init(&c->cyclestate.cyclestate_buffer_mutex);
 	if (err)
-		goto fail_5;
+		goto fail_4;
 	err = nvgpu_mutex_init(&c->cs_client_mutex);
 	if (err)
-		goto fail_6;
+		goto fail_5;
 #endif
 	err = nvgpu_mutex_init(&c->event_id_list_lock);
 	if (err)
-		goto fail_7;
+		goto fail_6;
 	err = nvgpu_mutex_init(&c->dbg_s_lock);
 	if (err)
-		goto fail_8;
+		goto fail_7;
 
 	nvgpu_list_add(&c->free_chs, &g->fifo.free_chs);
 
 	return 0;
 
-fail_8:
-	nvgpu_mutex_destroy(&c->event_id_list_lock);
 fail_7:
+	nvgpu_mutex_destroy(&c->event_id_list_lock);
+fail_6:
 #if defined(CONFIG_GK20A_CYCLE_STATS)
 	nvgpu_mutex_destroy(&c->cs_client_mutex);
-fail_6:
-	nvgpu_mutex_destroy(&c->cyclestate.cyclestate_buffer_mutex);
 fail_5:
+	nvgpu_mutex_destroy(&c->cyclestate.cyclestate_buffer_mutex);
+fail_4:
 #endif
 	nvgpu_mutex_destroy(&c->sync_lock);
-fail_4:
-	nvgpu_mutex_destroy(&c->joblist.pre_alloc.read_lock);
 fail_3:
-	nvgpu_mutex_destroy(&c->joblist.cleanup_lock);
+	nvgpu_mutex_destroy(&c->joblist.pre_alloc.read_lock);
 fail_2:
-	nvgpu_mutex_destroy(&c->error_notifier_mutex);
+	nvgpu_mutex_destroy(&c->joblist.cleanup_lock);
 fail_1:
 	nvgpu_mutex_destroy(&c->ioctl_lock);
 
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
index 8c9095b2..ff96d0d7 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -273,11 +273,6 @@ struct channel_gk20a {
 	bool timeout_debug_dump;
 	unsigned int timeslice_us;
 
-	struct dma_buf *error_notifier_ref;
-	struct nvgpu_notification *error_notifier;
-	void *error_notifier_va;
-	struct nvgpu_mutex error_notifier_mutex;
-
 	struct nvgpu_mutex sync_lock;
 	struct gk20a_channel_sync *sync;
 
@@ -335,8 +330,6 @@ bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch,
 void gk20a_disable_channel(struct channel_gk20a *ch);
 void gk20a_channel_abort(struct channel_gk20a *ch, bool channel_preempt);
 void gk20a_channel_abort_clean_up(struct channel_gk20a *ch);
-void gk20a_set_error_notifier(struct channel_gk20a *ch, __u32 error);
-void gk20a_set_error_notifier_locked(struct channel_gk20a *ch, __u32 error);
 void gk20a_channel_semaphore_wakeup(struct gk20a *g, bool post_events);
 int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 size,
 			     struct priv_cmd_entry *entry);
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index 648a8c86..38aecc93 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -39,6 +39,7 @@
 #include <nvgpu/nvhost.h>
 #include <nvgpu/barrier.h>
 #include <nvgpu/ctxsw_trace.h>
+#include <nvgpu/error_notifier.h>
 
 #include "gk20a.h"
 #include "mm_gk20a.h"
@@ -557,7 +558,6 @@ static void gk20a_remove_fifo_support(struct fifo_gk20a *f)
 		nvgpu_mutex_destroy(&tsg->event_id_list_lock);
 
 		nvgpu_mutex_destroy(&c->ioctl_lock);
-		nvgpu_mutex_destroy(&c->error_notifier_mutex);
 		nvgpu_mutex_destroy(&c->joblist.cleanup_lock);
 		nvgpu_mutex_destroy(&c->joblist.pre_alloc.read_lock);
 		nvgpu_mutex_destroy(&c->sync_lock);
@@ -1339,14 +1339,10 @@ static bool gk20a_fifo_ch_timeout_debug_dump_state(struct gk20a *g,
 	if (!refch)
 		return verbose;
 
-	nvgpu_mutex_acquire(&refch->error_notifier_mutex);
-	if (refch->error_notifier_ref) {
-		u32 err = refch->error_notifier->info32;
+	if (nvgpu_is_error_notifier_set(refch,
+			NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT))
+		verbose = refch->timeout_debug_dump;
 
-		if (err == NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT)
-			verbose = refch->timeout_debug_dump;
-	}
-	nvgpu_mutex_release(&refch->error_notifier_mutex);
 	return verbose;
 }
 
@@ -1400,8 +1396,8 @@ void gk20a_fifo_set_ctx_mmu_error_ch(struct gk20a *g,
 {
 	nvgpu_err(g,
 		"channel %d generated a mmu fault", refch->chid);
-	gk20a_set_error_notifier(refch,
-				NVGPU_CHANNEL_FIFO_ERROR_MMU_ERR_FLT);
+	nvgpu_set_error_notifier(refch,
+				NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT);
 }
 
 void gk20a_fifo_set_ctx_mmu_error_tsg(struct gk20a *g,
@@ -1939,7 +1935,7 @@ int gk20a_fifo_force_reset_ch(struct channel_gk20a *ch,
 
 		list_for_each_entry(ch_tsg, &tsg->ch_list, ch_entry) {
 			if (gk20a_channel_get(ch_tsg)) {
-				gk20a_set_error_notifier(ch_tsg, err_code);
+				nvgpu_set_error_notifier(ch_tsg, err_code);
 				gk20a_channel_put(ch_tsg);
 			}
 		}
@@ -1947,7 +1943,7 @@ int gk20a_fifo_force_reset_ch(struct channel_gk20a *ch,
 		nvgpu_rwsem_up_read(&tsg->ch_list_lock);
 		gk20a_fifo_recover_tsg(g, ch->tsgid, verbose);
 	} else {
-		gk20a_set_error_notifier(ch, err_code);
+		nvgpu_set_error_notifier(ch, err_code);
 		gk20a_fifo_recover_ch(g, ch->chid, verbose);
 	}
 
@@ -2108,8 +2104,8 @@ static bool gk20a_fifo_check_ch_ctxsw_timeout(struct channel_gk20a *ch,
 		*verbose = ch->timeout_debug_dump;
 		*ms = ch->timeout_accumulated_ms;
 		if (recover)
-			gk20a_set_error_notifier(ch,
-					NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT);
+			nvgpu_set_error_notifier(ch,
+					NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
 
 		gk20a_channel_put(ch);
 	}
@@ -2170,8 +2166,8 @@ bool gk20a_fifo_check_tsg_ctxsw_timeout(struct tsg_gk20a *tsg,
 		gk20a_channel_put(ch);
 		list_for_each_entry(ch, &tsg->ch_list, ch_entry) {
 			if (gk20a_channel_get(ch)) {
-				gk20a_set_error_notifier(ch,
-					NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT);
+				nvgpu_set_error_notifier(ch,
+					NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
 				*verbose |= ch->timeout_debug_dump;
 				gk20a_channel_put(ch);
 			}
@@ -2413,7 +2409,7 @@ unsigned int gk20a_fifo_handle_pbdma_intr_0(struct gk20a *g, u32 pbdma_id,
 			rc_type = RC_TYPE_PBDMA_FAULT;
 			nvgpu_err(g,
 				"semaphore acquire timeout!");
-			*error_notifier = NVGPU_CHANNEL_GR_SEMAPHORE_TIMEOUT;
+			*error_notifier = NVGPU_ERR_NOTIFIER_GR_SEMAPHORE_TIMEOUT;
 		}
 		*handled |= pbdma_intr_0_acquire_pending_f();
 	}
@@ -2431,7 +2427,7 @@ unsigned int gk20a_fifo_handle_pbdma_intr_0(struct gk20a *g, u32 pbdma_id,
 
 	if (pbdma_intr_0 & pbdma_intr_0_pbcrc_pending_f()) {
 		*error_notifier =
-			NVGPU_CHANNEL_PBDMA_PUSHBUFFER_CRC_MISMATCH;
+			NVGPU_ERR_NOTIFIER_PBDMA_PUSHBUFFER_CRC_MISMATCH;
 		rc_type = RC_TYPE_PBDMA_FAULT;
 	}
 
@@ -2485,7 +2481,7 @@ static void gk20a_fifo_pbdma_fault_rc(struct gk20a *g,
 		struct channel_gk20a *ch = &f->channel[id];
 
 		if (gk20a_channel_get(ch)) {
-			gk20a_set_error_notifier(ch, error_notifier);
+			nvgpu_set_error_notifier(ch, error_notifier);
 			gk20a_fifo_recover_ch(g, id, true);
 			gk20a_channel_put(ch);
 		}
@@ -2497,7 +2493,7 @@ static void gk20a_fifo_pbdma_fault_rc(struct gk20a *g,
 		nvgpu_rwsem_down_read(&tsg->ch_list_lock);
 		list_for_each_entry(ch, &tsg->ch_list, ch_entry) {
 			if (gk20a_channel_get(ch)) {
-				gk20a_set_error_notifier(ch,
+				nvgpu_set_error_notifier(ch,
 					error_notifier);
 				gk20a_channel_put(ch);
 			}
@@ -2514,7 +2510,7 @@ u32 gk20a_fifo_handle_pbdma_intr(struct gk20a *g, struct fifo_gk20a *f,
 	u32 pbdma_intr_1 = gk20a_readl(g, pbdma_intr_1_r(pbdma_id));
 
 	u32 handled = 0;
-	u32 error_notifier = NVGPU_CHANNEL_PBDMA_ERROR;
+	u32 error_notifier = NVGPU_ERR_NOTIFIER_PBDMA_ERROR;
 	unsigned int rc_type = RC_TYPE_NO_RC;
 
 	if (pbdma_intr_0) {
@@ -2658,8 +2654,8 @@ void __locked_fifo_preempt_timeout_rc(struct gk20a *g, u32 id,
 		list_for_each_entry(ch, &tsg->ch_list, ch_entry) {
 			if (!gk20a_channel_get(ch))
 				continue;
-			gk20a_set_error_notifier(ch,
-				NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT);
+			nvgpu_set_error_notifier(ch,
+				NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
 			gk20a_channel_put(ch);
 		}
 		nvgpu_rwsem_up_read(&tsg->ch_list_lock);
@@ -2671,8 +2667,8 @@ void __locked_fifo_preempt_timeout_rc(struct gk20a *g, u32 id,
 			"preempt channel %d timeout", id);
 
 		if (gk20a_channel_get(ch)) {
-			gk20a_set_error_notifier(ch,
-					NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT);
+			nvgpu_set_error_notifier(ch,
+					NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
 			gk20a_fifo_recover_ch(g, id, true);
 			gk20a_channel_put(ch);
 		}
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 270d36d6..b3969b60 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -40,6 +40,7 @@
 #include <nvgpu/barrier.h>
 #include <nvgpu/mm.h>
 #include <nvgpu/ctxsw_trace.h>
+#include <nvgpu/error_notifier.h>
 
 #include "gk20a.h"
 #include "gr_ctx_gk20a.h"
@@ -5113,14 +5114,14 @@ static void gk20a_gr_set_error_notifier(struct gk20a *g,
 			nvgpu_rwsem_down_read(&tsg->ch_list_lock);
 			list_for_each_entry(ch_tsg, &tsg->ch_list, ch_entry) {
 				if (gk20a_channel_get(ch_tsg)) {
-					gk20a_set_error_notifier(ch_tsg,
+					nvgpu_set_error_notifier(ch_tsg,
 							 error_notifier);
 					gk20a_channel_put(ch_tsg);
 				}
 			}
 			nvgpu_rwsem_up_read(&tsg->ch_list_lock);
 		} else {
-			gk20a_set_error_notifier(ch, error_notifier);
+			nvgpu_set_error_notifier(ch, error_notifier);
 		}
 	}
 }
@@ -5130,7 +5131,7 @@ static int gk20a_gr_handle_semaphore_timeout_pending(struct gk20a *g,
 {
 	gk20a_dbg_fn("");
 	gk20a_gr_set_error_notifier(g, isr_data,
-			 NVGPU_CHANNEL_GR_SEMAPHORE_TIMEOUT);
+			 NVGPU_ERR_NOTIFIER_GR_SEMAPHORE_TIMEOUT);
 	nvgpu_err(g,
 		   "gr semaphore timeout");
 	return -EINVAL;
@@ -5141,7 +5142,7 @@ static int gk20a_gr_intr_illegal_notify_pending(struct gk20a *g,
 {
 	gk20a_dbg_fn("");
 	gk20a_gr_set_error_notifier(g, isr_data,
-			 NVGPU_CHANNEL_GR_ILLEGAL_NOTIFY);
+			 NVGPU_ERR_NOTIFIER_GR_ILLEGAL_NOTIFY);
 	/* This is an unrecoverable error, reset is needed */
 	nvgpu_err(g,
 		   "gr semaphore timeout");
@@ -5156,7 +5157,7 @@ static int gk20a_gr_handle_illegal_method(struct gk20a *g,
 			isr_data->data_lo);
 	if (ret) {
 		gk20a_gr_set_error_notifier(g, isr_data,
-			 NVGPU_CHANNEL_GR_ILLEGAL_NOTIFY);
+			 NVGPU_ERR_NOTIFIER_GR_ILLEGAL_NOTIFY);
 		nvgpu_err(g, "invalid method class 0x%08x"
 			", offset 0x%08x address 0x%08x",
 			isr_data->class_num, isr_data->offset, isr_data->addr);
@@ -5169,7 +5170,7 @@ static int gk20a_gr_handle_illegal_class(struct gk20a *g,
 {
 	gk20a_dbg_fn("");
 	gk20a_gr_set_error_notifier(g, isr_data,
-			 NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY);
+			 NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY);
 	nvgpu_err(g,
 		   "invalid class 0x%08x, offset 0x%08x",
 		   isr_data->class_num, isr_data->offset);
@@ -5193,7 +5194,7 @@ int gk20a_gr_handle_fecs_error(struct gk20a *g, struct channel_gk20a *ch,
 
 	if (gr_fecs_intr & gr_fecs_host_int_status_umimp_firmware_method_f(1)) {
 		gk20a_gr_set_error_notifier(g, isr_data,
-			 NVGPU_CHANNEL_FECS_ERR_UNIMP_FIRMWARE_METHOD);
+			 NVGPU_ERR_NOTIFIER_FECS_ERR_UNIMP_FIRMWARE_METHOD);
 		nvgpu_err(g,
 			  "firmware method error 0x%08x for offset 0x%04x",
 			  gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(6)),
@@ -5215,7 +5216,7 @@ static int gk20a_gr_handle_class_error(struct gk20a *g,
 	gr_class_error =
 		gr_class_error_code_v(gk20a_readl(g, gr_class_error_r()));
 	gk20a_gr_set_error_notifier(g, isr_data,
-			 NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY);
+			 NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY);
 	nvgpu_err(g, "class error 0x%08x, offset 0x%08x,"
 		"sub channel 0x%08x mme generated %d,"
 		" mme pc 0x%08xdata high %d priv status %d"
@@ -5244,7 +5245,7 @@ static int gk20a_gr_handle_firmware_method(struct gk20a *g,
 	gk20a_dbg_fn("");
 
 	gk20a_gr_set_error_notifier(g, isr_data,
-			 NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY);
+			 NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY);
 	nvgpu_err(g,
 		   "firmware method 0x%08x, offset 0x%08x for channel %u",
 		   isr_data->class_num, isr_data->offset,
@@ -6024,7 +6025,7 @@ int gk20a_gr_isr(struct gk20a *g)
 		if (need_reset) {
 			nvgpu_err(g, "set gr exception notifier");
 			gk20a_gr_set_error_notifier(g, &isr_data,
-					 NVGPU_CHANNEL_GR_EXCEPTION);
+					 NVGPU_ERR_NOTIFIER_GR_EXCEPTION);
 		}
 	}
 
diff --git a/drivers/gpu/nvgpu/include/nvgpu/error_notifier.h b/drivers/gpu/nvgpu/include/nvgpu/error_notifier.h
new file mode 100644
index 00000000..3e70c379
--- /dev/null
+++ b/drivers/gpu/nvgpu/include/nvgpu/error_notifier.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __NVGPU_ERROR_NOTIFIER_H__
+#define __NVGPU_ERROR_NOTIFIER_H__
+
+#include <nvgpu/types.h>
+
+struct channel_gk20a;
+
+enum {
+	NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT = 0,
+	NVGPU_ERR_NOTIFIER_GR_ERROR_SW_METHOD,
+	NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY,
+	NVGPU_ERR_NOTIFIER_GR_EXCEPTION,
+	NVGPU_ERR_NOTIFIER_GR_SEMAPHORE_TIMEOUT,
+	NVGPU_ERR_NOTIFIER_GR_ILLEGAL_NOTIFY,
+	NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT,
+	NVGPU_ERR_NOTIFIER_PBDMA_ERROR,
+	NVGPU_ERR_NOTIFIER_FECS_ERR_UNIMP_FIRMWARE_METHOD,
+	NVGPU_ERR_NOTIFIER_RESETCHANNEL_VERIF_ERROR,
+	NVGPU_ERR_NOTIFIER_PBDMA_PUSHBUFFER_CRC_MISMATCH,
+};
+
+void nvgpu_set_error_notifier_locked(struct channel_gk20a *ch, u32 error);
+void nvgpu_set_error_notifier(struct channel_gk20a *ch, u32 error);
+void nvgpu_set_error_notifier_if_empty(struct channel_gk20a *ch, u32 error);
+bool nvgpu_is_error_notifier_set(struct channel_gk20a *ch, u32 error_notifier);
+
+#endif /* __NVGPU_ERROR_NOTIFIER_H__ */
-- 
cgit v1.2.2