From 403874fa75dbb00e974a8d0f88b6e92be01ba42e Mon Sep 17 00:00:00 2001
From: David Nieto <dmartineznie@nvidia.com>
Date: Mon, 6 Feb 2017 15:44:55 -0800
Subject: gpu: nvgpu: refactor interrupt handling

JIRA: EVLR-1004

(*) Refactor the non-stalling interrupt path to execute clear on the
top half, so on dGPU case processing of stalling interrupts does not
block non-stalling one.
(*) Use a worker thread to do semaphore wakeups and allow batching of
the non-stalling operations.
(*) Fix a bug where some gpus will not properly track the completion
of interrupts, preventing safe driver unloads

Change-Id: Icc90a3acba544c97ec6a9285ab235d337ab9eefa
Signed-off-by: David Nieto <dmartineznie@nvidia.com>
Reviewed-on: http://git-master/r/1312796
Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Lakshmanan M <lm@nvidia.com>
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-by: Navneet Kumar <navneetk@nvidia.com>
---
 drivers/gpu/nvgpu/gk20a/ce2_gk20a.c  | 11 ++--
 drivers/gpu/nvgpu/gk20a/ce2_gk20a.h  |  2 +-
 drivers/gpu/nvgpu/gk20a/fifo_gk20a.c |  6 +--
 drivers/gpu/nvgpu/gk20a/fifo_gk20a.h |  2 +-
 drivers/gpu/nvgpu/gk20a/gk20a.c      | 22 +++++---
 drivers/gpu/nvgpu/gk20a/gk20a.h      | 15 +++++-
 drivers/gpu/nvgpu/gk20a/gr_gk20a.c   |  8 +--
 drivers/gpu/nvgpu/gk20a/mc_gk20a.c   | 98 ++++++++++++++++++++++--------------
 drivers/gpu/nvgpu/gk20a/mc_gk20a.h   |  5 +-
 drivers/gpu/nvgpu/gm20b/mc_gm20b.c   |  3 +-
 drivers/gpu/nvgpu/gp10b/ce_gp10b.c   | 19 +++----
 drivers/gpu/nvgpu/gp10b/mc_gp10b.c   | 72 +++++++++++---------------
 drivers/gpu/nvgpu/pci.c              |  4 +-
 13 files changed, 140 insertions(+), 127 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
index db1ac539..3fed937e 100644
--- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
@@ -76,8 +76,9 @@ void gk20a_ce2_isr(struct gk20a *g, u32 inst_id, u32 pri_base)
 	return;
 }
 
-void gk20a_ce2_nonstall_isr(struct gk20a *g, u32 inst_id, u32 pri_base)
+int gk20a_ce2_nonstall_isr(struct gk20a *g, u32 inst_id, u32 pri_base)
 {
+	int ops = 0;
 	u32 ce2_intr = gk20a_readl(g, ce2_intr_status_r());
 
 	gk20a_dbg(gpu_dbg_intr, "ce2 nonstall isr %08x\n", ce2_intr);
@@ -85,12 +86,10 @@ void gk20a_ce2_nonstall_isr(struct gk20a *g, u32 inst_id, u32 pri_base)
 	if (ce2_intr & ce2_intr_status_nonblockpipe_pending_f()) {
 		gk20a_writel(g, ce2_intr_status_r(),
 			ce2_nonblockpipe_isr(g, ce2_intr));
-
-		/* wake threads waiting in this channel */
-		gk20a_channel_semaphore_wakeup(g, true);
+		ops |= (gk20a_nonstall_ops_wakeup_semaphore |
+			gk20a_nonstall_ops_post_events);
 	}
-
-	return;
+	return ops;
 }
 void gk20a_init_ce2(struct gpu_ops *gops)
 {
diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h
index 1bb25dd1..5cdd233e 100644
--- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h
@@ -26,7 +26,7 @@
 
 void gk20a_init_ce2(struct gpu_ops *gops);
 void gk20a_ce2_isr(struct gk20a *g, u32 inst_id, u32 pri_base);
-void gk20a_ce2_nonstall_isr(struct gk20a *g, u32 inst_id, u32 pri_base);
+int gk20a_ce2_nonstall_isr(struct gk20a *g, u32 inst_id, u32 pri_base);
 
 /* CE command utility macros */
 #define NVGPU_CE_LOWER_ADDRESS_OFFSET_MASK 0xffffffff
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index b2efc1fa..04d68872 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -2377,7 +2377,7 @@ void gk20a_fifo_isr(struct gk20a *g)
 	return;
 }
 
-void gk20a_fifo_nonstall_isr(struct gk20a *g)
+int gk20a_fifo_nonstall_isr(struct gk20a *g)
 {
 	u32 fifo_intr = gk20a_readl(g, fifo_intr_0_r());
 	u32 clear_intr = 0;
@@ -2389,9 +2389,7 @@ void gk20a_fifo_nonstall_isr(struct gk20a *g)
 
 	gk20a_writel(g, fifo_intr_0_r(), clear_intr);
 
-	gk20a_channel_semaphore_wakeup(g, false);
-
-	return;
+	return gk20a_nonstall_ops_wakeup_semaphore;
 }
 
 void gk20a_fifo_issue_preempt(struct gk20a *g, u32 id, bool is_tsg)
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
index 37808928..c67ab456 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
@@ -214,7 +214,7 @@ int gk20a_init_fifo_support(struct gk20a *g);
 int gk20a_init_fifo_setup_hw(struct gk20a *g);
 
 void gk20a_fifo_isr(struct gk20a *g);
-void gk20a_fifo_nonstall_isr(struct gk20a *g);
+int gk20a_fifo_nonstall_isr(struct gk20a *g);
 
 int gk20a_fifo_preempt_channel(struct gk20a *g, u32 hw_chid);
 int gk20a_fifo_preempt_tsg(struct gk20a *g, u32 tsgid);
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index d07918b0..e995dcbf 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -697,12 +697,6 @@ static irqreturn_t gk20a_intr_thread_stall(int irq, void *dev_id)
 	return g->ops.mc.isr_thread_stall(g);
 }
 
-static irqreturn_t gk20a_intr_thread_nonstall(int irq, void *dev_id)
-{
-	struct gk20a *g = dev_id;
-	return g->ops.mc.isr_thread_nonstall(g);
-}
-
 void gk20a_remove_support(struct device *dev)
 {
 	struct gk20a *g = get_gk20a(dev);
@@ -717,6 +711,12 @@ void gk20a_remove_support(struct device *dev)
 
 	gk20a_channel_cancel_pending_sema_waits(g);
 
+	if (g->nonstall_work_queue) {
+		cancel_work_sync(&g->nonstall_fn_work);
+		destroy_workqueue(g->nonstall_work_queue);
+		g->nonstall_work_queue = NULL;
+	}
+
 	if (g->pmu.remove_support)
 		g->pmu.remove_support(&g->pmu);
 
@@ -932,6 +932,13 @@ int gk20a_pm_finalize_poweron(struct device *dev)
 	if (g->ops.clk.disable_slowboot)
 		g->ops.clk.disable_slowboot(g);
 
+	/* Enable interrupt workqueue */
+	if (!g->nonstall_work_queue) {
+		g->nonstall_work_queue = alloc_workqueue("%s",
+						WQ_HIGHPRI, 1, "mc_nonstall");
+		INIT_WORK(&g->nonstall_fn_work, g->ops.mc.isr_nonstall_cb);
+	}
+
 	gk20a_enable_priv_ring(g);
 
 	/* TBD: move this after graphics init in which blcg/slcg is enabled.
@@ -1617,10 +1624,9 @@ static int gk20a_probe(struct platform_device *dev)
 				gk20a->irq_stall);
 		return err;
 	}
-	err = devm_request_threaded_irq(&dev->dev,
+	err = devm_request_irq(&dev->dev,
 			gk20a->irq_nonstall,
 			gk20a_intr_isr_nonstall,
-			gk20a_intr_thread_nonstall,
 			0, "gk20a_nonstall", gk20a);
 	if (err) {
 		dev_err(&dev->dev,
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 12c9a3ea..092bf7ae 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -155,7 +155,7 @@ struct gpu_ops {
 	} ltc;
 	struct {
 		void (*isr_stall)(struct gk20a *g, u32 inst_id, u32 pri_base);
-		void (*isr_nonstall)(struct gk20a *g, u32 inst_id, u32 pri_base);
+		int (*isr_nonstall)(struct gk20a *g, u32 inst_id, u32 pri_base);
 	} ce2;
 	struct {
 		int (*init_fs_state)(struct gk20a *g);
@@ -735,7 +735,8 @@ struct gpu_ops {
 		irqreturn_t (*isr_stall)(struct gk20a *g);
 		irqreturn_t (*isr_nonstall)(struct gk20a *g);
 		irqreturn_t (*isr_thread_stall)(struct gk20a *g);
-		irqreturn_t (*isr_thread_nonstall)(struct gk20a *g);
+		void (*isr_thread_nonstall)(struct gk20a *g, u32 intr);
+		void (*isr_nonstall_cb)(struct work_struct *work);
 		u32 intr_mask_restore[4];
 	} mc;
 	struct {
@@ -848,6 +849,10 @@ struct gk20a {
 	atomic_t usage_count;
 	int driver_is_dying;
 
+	atomic_t nonstall_ops;
+	struct work_struct nonstall_fn_work;
+	struct workqueue_struct *nonstall_work_queue;
+
 	struct resource *reg_mem;
 	void __iomem *regs;
 	void __iomem *regs_saved;
@@ -1151,6 +1156,12 @@ enum gk20a_dbg_categories {
 	gpu_dbg_mem     = BIT(31), /* memory accesses, very verbose */
 };
 
+/* operations that will need to be executed on non stall workqueue */
+enum gk20a_nonstall_ops {
+	gk20a_nonstall_ops_wakeup_semaphore = BIT(0), /* wake up semaphore */
+	gk20a_nonstall_ops_post_events = BIT(1),
+};
+
 extern u32 gk20a_dbg_mask;
 #ifdef CONFIG_GK20A_TRACE_PRINTK
 extern u32 gk20a_dbg_ftrace;
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 3ab63862..afa665ab 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -6582,6 +6582,7 @@ int gk20a_gr_isr(struct gk20a *g)
 
 int gk20a_gr_nonstall_isr(struct gk20a *g)
 {
+	int ops = 0;
 	u32 gr_intr = gk20a_readl(g, gr_intr_nonstall_r());
 
 	gk20a_dbg(gpu_dbg_intr, "pgraph nonstall intr %08x", gr_intr);
@@ -6590,11 +6591,10 @@ int gk20a_gr_nonstall_isr(struct gk20a *g)
 		/* Clear the interrupt */
 		gk20a_writel(g, gr_intr_nonstall_r(),
 			gr_intr_nonstall_trap_pending_f());
-		/* Wakeup all the waiting channels */
-		gk20a_channel_semaphore_wakeup(g, true);
+		ops |= (gk20a_nonstall_ops_wakeup_semaphore |
+			gk20a_nonstall_ops_post_events);
 	}
-
-	return 0;
+	return ops;
 }
 
 int gr_gk20a_fecs_get_reglist_img_size(struct gk20a *g, u32 *size)
diff --git a/drivers/gpu/nvgpu/gk20a/mc_gk20a.c b/drivers/gpu/nvgpu/gk20a/mc_gk20a.c
index 65f1a119..caab6b5e 100644
--- a/drivers/gpu/nvgpu/gk20a/mc_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mc_gk20a.c
@@ -21,6 +21,24 @@
 
 #include <nvgpu/hw/gk20a/hw_mc_gk20a.h>
 
+void mc_gk20a_nonstall_cb(struct work_struct *work)
+{
+	struct gk20a *g = container_of(work, struct gk20a, nonstall_fn_work);
+	u32 ops;
+	bool semaphore_wakeup, post_events;
+
+	do {
+		ops = atomic_xchg(&g->nonstall_ops, 0);
+
+		semaphore_wakeup = ops & gk20a_nonstall_ops_wakeup_semaphore;
+		post_events = ops & gk20a_nonstall_ops_post_events;
+
+		if (semaphore_wakeup)
+			gk20a_channel_semaphore_wakeup(g, post_events);
+
+	} while (atomic_read(&g->nonstall_ops) != 0);
+}
+
 irqreturn_t mc_gk20a_isr_stall(struct gk20a *g)
 {
 	u32 mc_intr_0;
@@ -51,6 +69,7 @@ irqreturn_t mc_gk20a_isr_stall(struct gk20a *g)
 irqreturn_t mc_gk20a_isr_nonstall(struct gk20a *g)
 {
 	u32 mc_intr_1;
+	u32 hw_irq_count;
 
 	if (!g->power_on)
 		return IRQ_NONE;
@@ -66,9 +85,23 @@ irqreturn_t mc_gk20a_isr_nonstall(struct gk20a *g)
 	/* flush previous write */
 	gk20a_readl(g, mc_intr_en_1_r());
 
-	atomic_inc(&g->hw_irq_nonstall_count);
+	if (g->ops.mc.isr_thread_nonstall)
+		g->ops.mc.isr_thread_nonstall(g, mc_intr_1);
 
-	return IRQ_WAKE_THREAD;
+	hw_irq_count = atomic_inc_return(&g->hw_irq_nonstall_count);
+
+	/* sync handled irq counter before re-enabling interrupts */
+	atomic_set(&g->sw_irq_nonstall_last_handled, hw_irq_count);
+
+	gk20a_writel(g, mc_intr_en_1_r(),
+		mc_intr_en_1_inta_hardware_f());
+
+	/* flush previous write */
+	gk20a_readl(g, mc_intr_en_1_r());
+
+	wake_up_all(&g->sw_irq_nonstall_last_handled_wq);
+
+	return IRQ_HANDLED;
 }
 
 irqreturn_t mc_gk20a_intr_thread_stall(struct gk20a *g)
@@ -137,59 +170,47 @@ irqreturn_t mc_gk20a_intr_thread_stall(struct gk20a *g)
 	return IRQ_HANDLED;
 }
 
-irqreturn_t mc_gk20a_intr_thread_nonstall(struct gk20a *g)
+void mc_gk20a_intr_thread_nonstall(struct gk20a *g, u32 mc_intr_1)
 {
-	u32 mc_intr_1;
-	int hw_irq_count;
 	u32 engine_id_idx;
 	u32 active_engine_id = 0;
 	u32 engine_enum = ENGINE_INVAL_GK20A;
-
-	gk20a_dbg(gpu_dbg_intr, "interrupt thread launched");
-
-	mc_intr_1 = gk20a_readl(g, mc_intr_1_r());
-	hw_irq_count = atomic_read(&g->hw_irq_nonstall_count);
-
-	gk20a_dbg(gpu_dbg_intr, "non-stall intr %08x\n", mc_intr_1);
+	int ops_old, ops_new, ops = 0;
 
 	if (mc_intr_1 & mc_intr_0_pfifo_pending_f())
-		gk20a_fifo_nonstall_isr(g);
-	if (mc_intr_1 & mc_intr_0_priv_ring_pending_f())
-		gk20a_priv_ring_isr(g);
+		ops |= gk20a_fifo_nonstall_isr(g);
 
-	for (engine_id_idx = 0; engine_id_idx < g->fifo.num_engines; engine_id_idx++) {
+	for (engine_id_idx = 0; engine_id_idx < g->fifo.num_engines;
+							engine_id_idx++) {
 		active_engine_id = g->fifo.active_engines_list[engine_id_idx];
 
-		if (mc_intr_1 & g->fifo.engine_info[active_engine_id].intr_mask) {
+		if (mc_intr_1 &
+			g->fifo.engine_info[active_engine_id].intr_mask) {
 			engine_enum = g->fifo.engine_info[active_engine_id].engine_enum;
 			/* GR Engine */
-			if (engine_enum == ENGINE_GR_GK20A) {
-				gk20a_gr_nonstall_isr(g);
-			}
+			if (engine_enum == ENGINE_GR_GK20A)
+				ops |= gk20a_gr_nonstall_isr(g);
 
 			/* CE Engine */
 			if (((engine_enum == ENGINE_GRCE_GK20A) ||
 				(engine_enum == ENGINE_ASYNC_CE_GK20A)) &&
-				g->ops.ce2.isr_nonstall) {
-					g->ops.ce2.isr_nonstall(g,
-					g->fifo.engine_info[active_engine_id].inst_id,
-					g->fifo.engine_info[active_engine_id].pri_base);
-			}
+				g->ops.ce2.isr_nonstall)
+					ops |= g->ops.ce2.isr_nonstall(g,
+					g->fifo.engine_info[active_engine_id].
+								inst_id,
+					g->fifo.engine_info[active_engine_id].
+								pri_base);
 		}
 	}
-
-	/* sync handled irq counter before re-enabling interrupts */
-	atomic_set(&g->sw_irq_nonstall_last_handled, hw_irq_count);
-
-	gk20a_writel(g, mc_intr_en_1_r(),
-		mc_intr_en_1_inta_hardware_f());
-
-	/* flush previous write */
-	gk20a_readl(g, mc_intr_en_1_r());
-
-	wake_up_all(&g->sw_irq_nonstall_last_handled_wq);
-
-	return IRQ_HANDLED;
+	if (ops) {
+		do {
+			ops_old = atomic_read(&g->nonstall_ops);
+			ops_new  = ops_old | ops;
+		} while (ops_old != atomic_cmpxchg(&g->nonstall_ops,
+						ops_old, ops_new));
+
+		queue_work(g->nonstall_work_queue, &g->nonstall_fn_work);
+	}
 }
 
 void mc_gk20a_intr_enable(struct gk20a *g)
@@ -237,4 +258,5 @@ void gk20a_init_mc(struct gpu_ops *gops)
 	gops->mc.isr_nonstall = mc_gk20a_isr_nonstall;
 	gops->mc.isr_thread_stall = mc_gk20a_intr_thread_stall;
 	gops->mc.isr_thread_nonstall = mc_gk20a_intr_thread_nonstall;
+	gops->mc.isr_nonstall_cb = mc_gk20a_nonstall_cb;
 }
diff --git a/drivers/gpu/nvgpu/gk20a/mc_gk20a.h b/drivers/gpu/nvgpu/gk20a/mc_gk20a.h
index 4bb3e118..1aad1a0b 100644
--- a/drivers/gpu/nvgpu/gk20a/mc_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mc_gk20a.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -22,5 +22,6 @@ void mc_gk20a_intr_unit_config(struct gk20a *g, bool enable,
 irqreturn_t mc_gk20a_isr_stall(struct gk20a *g);
 irqreturn_t mc_gk20a_isr_nonstall(struct gk20a *g);
 irqreturn_t mc_gk20a_intr_thread_stall(struct gk20a *g);
-irqreturn_t mc_gk20a_intr_thread_nonstall(struct gk20a *g);
+void mc_gk20a_intr_thread_nonstall(struct gk20a *g, u32 intr);
+void mc_gk20a_nonstall_cb(struct work_struct *work);
 #endif
diff --git a/drivers/gpu/nvgpu/gm20b/mc_gm20b.c b/drivers/gpu/nvgpu/gm20b/mc_gm20b.c
index 1d2d78e3..c8a42cd7 100644
--- a/drivers/gpu/nvgpu/gm20b/mc_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/mc_gm20b.c
@@ -1,7 +1,7 @@
 /*
  * GK20A memory interface
  *
- * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -27,4 +27,5 @@ void gm20b_init_mc(struct gpu_ops *gops)
 	gops->mc.isr_nonstall = mc_gk20a_isr_nonstall;
 	gops->mc.isr_thread_stall = mc_gk20a_intr_thread_stall;
 	gops->mc.isr_thread_nonstall = mc_gk20a_intr_thread_nonstall;
+	gops->mc.isr_nonstall_cb = mc_gk20a_nonstall_cb;
 }
diff --git a/drivers/gpu/nvgpu/gp10b/ce_gp10b.c b/drivers/gpu/nvgpu/gp10b/ce_gp10b.c
index f19a4b0f..169309fa 100644
--- a/drivers/gpu/nvgpu/gp10b/ce_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/ce_gp10b.c
@@ -1,7 +1,7 @@
 /*
  * Pascal GPU series Copy Engine.
  *
- * Copyright (c) 2011-2016, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2011-2017, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -22,15 +22,6 @@
 
 #include <nvgpu/hw/gp10b/hw_ce_gp10b.h>
 
-static void ce_nonblockpipe_isr(struct gk20a *g, u32 fifo_intr)
-{
-	gk20a_dbg(gpu_dbg_intr, "ce non-blocking pipe interrupt\n");
-
-	/* wake theads waiting in this channel */
-	gk20a_channel_semaphore_wakeup(g, true);
-	return;
-}
-
 static u32 ce_blockpipe_isr(struct gk20a *g, u32 fifo_intr)
 {
 	gk20a_dbg(gpu_dbg_intr, "ce blocking pipe interrupt\n");
@@ -63,8 +54,9 @@ static void gp10b_ce_isr(struct gk20a *g, u32 inst_id, u32 pri_base)
 	return;
 }
 
-static void gp10b_ce_nonstall_isr(struct gk20a *g, u32 inst_id, u32 pri_base)
+static int gp10b_ce_nonstall_isr(struct gk20a *g, u32 inst_id, u32 pri_base)
 {
+	int ops = 0;
 	u32 ce_intr = gk20a_readl(g, ce_intr_status_r(inst_id));
 
 	gk20a_dbg(gpu_dbg_intr, "ce nonstall isr %08x %08x\n", ce_intr, inst_id);
@@ -72,10 +64,11 @@ static void gp10b_ce_nonstall_isr(struct gk20a *g, u32 inst_id, u32 pri_base)
 	if (ce_intr & ce_intr_status_nonblockpipe_pending_f()) {
 		gk20a_writel(g, ce_intr_status_r(inst_id),
 			ce_intr_status_nonblockpipe_pending_f());
-		ce_nonblockpipe_isr(g, ce_intr);
+		ops |= (gk20a_nonstall_ops_wakeup_semaphore |
+			gk20a_nonstall_ops_post_events);
 	}
 
-	return;
+	return ops;
 }
 void gp10b_init_ce(struct gpu_ops *gops)
 {
diff --git a/drivers/gpu/nvgpu/gp10b/mc_gp10b.c b/drivers/gpu/nvgpu/gp10b/mc_gp10b.c
index 3f066c37..dfcbe398 100644
--- a/drivers/gpu/nvgpu/gp10b/mc_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mc_gp10b.c
@@ -16,6 +16,7 @@
 #include <linux/types.h>
 
 #include "gk20a/gk20a.h"
+#include "gk20a/mc_gk20a.h"
 
 #include "mc_gp10b.h"
 
@@ -80,12 +81,15 @@ irqreturn_t mc_gp10b_isr_stall(struct gk20a *g)
 
 	gk20a_writel(g, mc_intr_en_clear_r(0), 0xffffffff);
 
+	atomic_inc(&g->hw_irq_stall_count);
+
 	return IRQ_WAKE_THREAD;
 }
 
 irqreturn_t mc_gp10b_isr_nonstall(struct gk20a *g)
 {
 	u32 mc_intr_1;
+	u32 hw_irq_count;
 
 	if (!g->power_on)
 		return IRQ_NONE;
@@ -97,12 +101,27 @@ irqreturn_t mc_gp10b_isr_nonstall(struct gk20a *g)
 
 	gk20a_writel(g, mc_intr_en_clear_r(1), 0xffffffff);
 
-	return IRQ_WAKE_THREAD;
+	if (g->ops.mc.isr_thread_nonstall)
+		g->ops.mc.isr_thread_nonstall(g, mc_intr_1);
+
+	hw_irq_count = atomic_inc_return(&g->hw_irq_nonstall_count);
+
+	gk20a_writel(g, mc_intr_en_set_r(NVGPU_MC_INTR_NONSTALLING),
+			g->ops.mc.intr_mask_restore[NVGPU_MC_INTR_NONSTALLING]);
+
+	/* sync handled irq counter before re-enabling interrupts */
+	atomic_set(&g->sw_irq_nonstall_last_handled, hw_irq_count);
+
+	wake_up_all(&g->sw_irq_nonstall_last_handled_wq);
+
+	return IRQ_HANDLED;
 }
 
 irqreturn_t mc_gp10b_intr_thread_stall(struct gk20a *g)
 {
 	u32 mc_intr_0;
+	int hw_irq_count;
+
 	u32 engine_id_idx;
 	u32 active_engine_id = 0;
 	u32 engine_enum = ENGINE_INVAL_GK20A;
@@ -110,6 +129,7 @@ irqreturn_t mc_gp10b_intr_thread_stall(struct gk20a *g)
 	gk20a_dbg(gpu_dbg_intr, "interrupt thread launched");
 
 	mc_intr_0 = gk20a_readl(g, mc_intr_r(0));
+	hw_irq_count = atomic_read(&g->hw_irq_stall_count);
 
 	gk20a_dbg(gpu_dbg_intr, "stall intr %08x\n", mc_intr_0);
 
@@ -146,51 +166,13 @@ irqreturn_t mc_gp10b_intr_thread_stall(struct gk20a *g)
 	if (mc_intr_0 & mc_intr_ltc_pending_f())
 		g->ops.ltc.isr(g);
 
+	/* sync handled irq counter before re-enabling interrupts */
+	atomic_set(&g->sw_irq_stall_last_handled, hw_irq_count);
+
 	gk20a_writel(g, mc_intr_en_set_r(NVGPU_MC_INTR_STALLING),
 			g->ops.mc.intr_mask_restore[NVGPU_MC_INTR_STALLING]);
 
-	return IRQ_HANDLED;
-}
-
-irqreturn_t mc_gp10b_intr_thread_nonstall(struct gk20a *g)
-{
-	u32 mc_intr_1;
-	u32 engine_id_idx;
-	u32 active_engine_id = 0;
-	u32 engine_enum = ENGINE_INVAL_GK20A;
-
-	gk20a_dbg(gpu_dbg_intr, "interrupt thread launched");
-
-	mc_intr_1 = gk20a_readl(g, mc_intr_r(1));
-
-	gk20a_dbg(gpu_dbg_intr, "non-stall intr %08x\n", mc_intr_1);
-
-	if (mc_intr_1 & mc_intr_pfifo_pending_f())
-		gk20a_fifo_nonstall_isr(g);
-
-	for (engine_id_idx = 0; engine_id_idx < g->fifo.num_engines; engine_id_idx++) {
-		active_engine_id = g->fifo.active_engines_list[engine_id_idx];
-
-		if (mc_intr_1 & g->fifo.engine_info[active_engine_id].intr_mask) {
-			engine_enum = g->fifo.engine_info[active_engine_id].engine_enum;
-			/* GR Engine */
-			if (engine_enum == ENGINE_GR_GK20A) {
-				gk20a_gr_nonstall_isr(g);
-			}
-
-			/* CE Engine */
-			if (((engine_enum == ENGINE_GRCE_GK20A) ||
-				(engine_enum == ENGINE_ASYNC_CE_GK20A)) &&
-				g->ops.ce2.isr_nonstall) {
-					g->ops.ce2.isr_nonstall(g,
-					g->fifo.engine_info[active_engine_id].inst_id,
-					g->fifo.engine_info[active_engine_id].pri_base);
-			}
-		}
-	}
-
-	gk20a_writel(g, mc_intr_en_set_r(NVGPU_MC_INTR_NONSTALLING),
-			g->ops.mc.intr_mask_restore[NVGPU_MC_INTR_NONSTALLING]);
+	wake_up_all(&g->sw_irq_stall_last_handled_wq);
 
 	return IRQ_HANDLED;
 }
@@ -202,5 +184,7 @@ void gp10b_init_mc(struct gpu_ops *gops)
 	gops->mc.isr_stall = mc_gp10b_isr_stall;
 	gops->mc.isr_nonstall = mc_gp10b_isr_nonstall;
 	gops->mc.isr_thread_stall = mc_gp10b_intr_thread_stall;
-	gops->mc.isr_thread_nonstall = mc_gp10b_intr_thread_nonstall;
+	gops->mc.isr_thread_nonstall = mc_gk20a_intr_thread_nonstall;
+	gops->mc.isr_nonstall_cb = mc_gk20a_nonstall_cb;
+
 }
diff --git a/drivers/gpu/nvgpu/pci.c b/drivers/gpu/nvgpu/pci.c
index 39559dac..69e16267 100644
--- a/drivers/gpu/nvgpu/pci.c
+++ b/drivers/gpu/nvgpu/pci.c
@@ -236,8 +236,7 @@ static irqreturn_t nvgpu_pci_isr(int irq, void *dev_id)
 		g->ops.xve.rearm_msi(g);
 #endif
 
-	return (ret_stall == IRQ_NONE && ret_nonstall == IRQ_NONE) ?
-		IRQ_NONE : IRQ_WAKE_THREAD;
+	return (ret_stall == IRQ_NONE) ? ret_nonstall : IRQ_WAKE_THREAD;
 }
 
 static irqreturn_t nvgpu_pci_intr_thread(int irq, void *dev_id)
@@ -245,7 +244,6 @@ static irqreturn_t nvgpu_pci_intr_thread(int irq, void *dev_id)
 	struct gk20a *g = dev_id;
 
 	g->ops.mc.isr_thread_stall(g);
-	g->ops.mc.isr_thread_nonstall(g);
 
 	return IRQ_HANDLED;
 }
-- 
cgit v1.2.2