From 0b02c8589dcc507865a8fd398431c45fbda2ba9c Mon Sep 17 00:00:00 2001
From: Alex Waterman <alexw@nvidia.com>
Date: Tue, 26 Jun 2018 08:53:15 -0700
Subject: Revert: GV11B runlist preemption patches

This reverts commit 2d397e34a5aafb5feed406a13f3db536eadae5bb.
This reverts commit cd6e821cf66837a2c3479e928414007064b9c496.
This reverts commit 5cf1eb145fef763f7153e449be60f1a7602e2c81.
This reverts commit a8d6f31bde3ccef22ee77023eaff4a62f6f88199.
This reverts commit 067ddbc4e4df3f1f756f03e7865c369a46f420aa.
This reverts commit 3eede64de058fcb1e39d723dd146bcd5d06c6f43.
This reverts commit 1407133b7e1b27a92ee8c116009541904d2ff691.
This reverts commit 797dde3e32647df3b616cea67f4defae59d38b3f.

Looks like this makes the ap_compute test on embedded-qnx-hv
e3550-t194 quite bad. Might also affect ap_resmgr.

Signed-off-by: Alex Waterman <alexw@nvidia.com>
Change-Id: Ib9f06514d554d1a67993f0f2bd3d180147385e0a
Reviewed-on: https://git-master.nvidia.com/r/1761864
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/gk20a/fifo_gk20a.c |  15 +-
 drivers/gpu/nvgpu/gk20a/fifo_gk20a.h |  10 +-
 drivers/gpu/nvgpu/gk20a/gk20a.h      |   6 +-
 drivers/gpu/nvgpu/gv100/mc_gv100.c   |  16 +-
 drivers/gpu/nvgpu/gv100/mc_gv100.h   |   3 +-
 drivers/gpu/nvgpu/gv11b/fb_gv11b.c   |  79 ++----
 drivers/gpu/nvgpu/gv11b/fifo_gv11b.c | 469 ++++++++++++-----------------------
 drivers/gpu/nvgpu/gv11b/fifo_gv11b.h |  10 +-
 drivers/gpu/nvgpu/gv11b/mc_gv11b.c   |  16 +-
 drivers/gpu/nvgpu/gv11b/mc_gv11b.h   |   3 +-
 10 files changed, 230 insertions(+), 397 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index dc1f48b7..00119300 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -55,7 +55,9 @@
 #define FECS_METHOD_WFI_RESTORE 0x80000
 #define FECS_MAILBOX_0_ACK_RESTORE 0x4
 
-
+static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
+					    u32 chid, bool add,
+					    bool wait_for_finish);
 static u32 gk20a_fifo_engines_on_id(struct gk20a *g, u32 id, bool is_tsg);
 
 static const char *const pbdma_intr_fault_type_desc[] = {
@@ -2702,7 +2704,7 @@ void gk20a_fifo_issue_preempt(struct gk20a *g, u32 id, bool is_tsg)
 }
 
 int gk20a_fifo_is_preempt_pending(struct gk20a *g, u32 id,
-		unsigned int id_type)
+		unsigned int id_type, unsigned int timeout_rc_type)
 {
 	struct nvgpu_timeout timeout;
 	u32 delay = GR_IDLE_CHECK_DEFAULT;
@@ -2775,8 +2777,8 @@ int __locked_fifo_preempt(struct gk20a *g, u32 id, bool is_tsg)
 	id_type = is_tsg ? ID_TYPE_TSG : ID_TYPE_CHANNEL;
 
 	/* wait for preempt */
-	ret = g->ops.fifo.is_preempt_pending(g, id, id_type);
-
+	ret = g->ops.fifo.is_preempt_pending(g, id, id_type,
+					 PREEMPT_TIMEOUT_RC);
 	return ret;
 }
 
@@ -3273,7 +3275,7 @@ void gk20a_fifo_runlist_hw_submit(struct gk20a *g, u32 runlist_id,
 		fifo_eng_runlist_length_f(count));
 }
 
-int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
+static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
 					    u32 chid, bool add,
 					    bool wait_for_finish)
 {
@@ -3446,7 +3448,8 @@ static int __locked_fifo_reschedule_preempt_next(struct channel_gk20a *ch,
 		gk20a_readl(g, fifo_preempt_r()));
 #endif
 	if (wait_preempt) {
-		g->ops.fifo.is_preempt_pending(g, preempt_id, preempt_type);
+		g->ops.fifo.is_preempt_pending(
+			g, preempt_id, preempt_type, PREEMPT_TIMEOUT_RC);
 	}
 #ifdef TRACEPOINTS_ENABLED
 	trace_gk20a_reschedule_preempted_next(ch->chid);
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
index 77030c94..bccd15f6 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
@@ -50,6 +50,9 @@ enum {
 #define ID_TYPE_TSG			1
 #define ID_TYPE_UNKNOWN			((u32)~0)
 
+#define PREEMPT_TIMEOUT_RC		1
+#define PREEMPT_TIMEOUT_NORC		0
+
 #define RC_YES				1
 #define RC_NO				0
 
@@ -254,9 +257,6 @@ int nvgpu_fifo_reschedule_runlist(struct channel_gk20a *ch, bool preempt_next,
 int gk20a_fifo_update_runlist(struct gk20a *g, u32 engine_id, u32 chid,
 			      bool add, bool wait_for_finish);
 
-int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
-					    u32 chid, bool add,
-					    bool wait_for_finish);
 int gk20a_fifo_suspend(struct gk20a *g);
 
 bool gk20a_fifo_mmu_fault_pending(struct gk20a *g);
@@ -390,8 +390,8 @@ void gk20a_fifo_channel_unbind(struct channel_gk20a *ch_gk20a);
 
 u32 gk20a_fifo_intr_0_error_mask(struct gk20a *g);
 
-int gk20a_fifo_is_preempt_pending(struct gk20a *g, u32 id,
-			unsigned int id_type);
+int gk20a_fifo_is_preempt_pending(struct gk20a *g, u32 id, unsigned int id_type,
+					 unsigned int timeout_rc_type);
 int __locked_fifo_preempt(struct gk20a *g, u32 id, bool is_tsg);
 void gk20a_fifo_preempt_timeout_rc(struct gk20a *g, u32 id,
 					 unsigned int id_type);
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index fac02f68..9061236e 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -662,9 +662,9 @@ struct gpu_ops {
 				struct ch_state *ch_state);
 		u32 (*intr_0_error_mask)(struct gk20a *g);
 		int (*is_preempt_pending)(struct gk20a *g, u32 id,
-			unsigned int id_type);
+			unsigned int id_type, unsigned int timeout_rc_type);
 		int (*preempt_ch_tsg)(struct gk20a *g, u32 id,
-			unsigned int id_type);
+			unsigned int id_type, unsigned int timeout_rc_type);
 		void (*init_pbdma_intr_descs)(struct fifo_gk20a *f);
 		int (*reset_enable_hw)(struct gk20a *g);
 		int (*setup_userd)(struct channel_gk20a *c);
@@ -1109,7 +1109,7 @@ struct gpu_ops {
 		bool (*is_intr_hub_pending)(struct gk20a *g, u32 mc_intr);
 		bool (*is_intr_nvlink_pending)(struct gk20a *g, u32 mc_intr);
 		bool (*is_stall_and_eng_intr_pending)(struct gk20a *g,
-					u32 act_eng_id, u32 *eng_intr_pending);
+								u32 act_eng_id);
 		u32 (*intr_stall)(struct gk20a *g);
 		void (*intr_stall_pause)(struct gk20a *g);
 		void (*intr_stall_resume)(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/gv100/mc_gv100.c b/drivers/gpu/nvgpu/gv100/mc_gv100.c
index 2d84a3a8..7ed9e6da 100644
--- a/drivers/gpu/nvgpu/gv100/mc_gv100.c
+++ b/drivers/gpu/nvgpu/gv100/mc_gv100.c
@@ -72,14 +72,15 @@ bool gv100_mc_is_intr_nvlink_pending(struct gk20a *g, u32 mc_intr_0)
 	return (((mc_intr_0 & mc_intr_nvlink_pending_f()) != 0U) ? true : false);
 }
 
-bool gv100_mc_is_stall_and_eng_intr_pending(struct gk20a *g, u32 act_eng_id,
-			u32 *eng_intr_pending)
+bool gv100_mc_is_stall_and_eng_intr_pending(struct gk20a *g, u32 act_eng_id)
 {
 	u32 mc_intr_0 = gk20a_readl(g, mc_intr_r(0));
 	u32 stall_intr, eng_intr_mask;
 
 	eng_intr_mask = gk20a_fifo_act_eng_interrupt_mask(g, act_eng_id);
-	*eng_intr_pending = mc_intr_0 & eng_intr_mask;
+	if ((mc_intr_0 & eng_intr_mask) != 0U) {
+		return true;
+	}
 
 	stall_intr = mc_intr_pfifo_pending_f() |
 			mc_intr_hub_pending_f() |
@@ -87,10 +88,9 @@ bool gv100_mc_is_stall_and_eng_intr_pending(struct gk20a *g, u32 act_eng_id,
 			mc_intr_pbus_pending_f() |
 			mc_intr_ltc_pending_f() |
 			mc_intr_nvlink_pending_f();
+	if ((mc_intr_0 & stall_intr) != 0U) {
+		return true;
+	}
 
-	nvgpu_log(g, gpu_dbg_info | gpu_dbg_intr,
-		"mc_intr_0 = 0x%08x, eng_intr = 0x%08x",
-		mc_intr_0 & stall_intr, *eng_intr_pending);
-
-	return (mc_intr_0 & (eng_intr_mask | stall_intr)) != 0U;
+	return false;
 }
diff --git a/drivers/gpu/nvgpu/gv100/mc_gv100.h b/drivers/gpu/nvgpu/gv100/mc_gv100.h
index e9069258..4aff4a36 100644
--- a/drivers/gpu/nvgpu/gv100/mc_gv100.h
+++ b/drivers/gpu/nvgpu/gv100/mc_gv100.h
@@ -26,6 +26,5 @@ struct gk20a;
 
 void mc_gv100_intr_enable(struct gk20a *g);
 bool gv100_mc_is_intr_nvlink_pending(struct gk20a *g, u32 mc_intr_0);
-bool gv100_mc_is_stall_and_eng_intr_pending(struct gk20a *g, u32 act_eng_id,
-			u32 *eng_intr_pending);
+bool gv100_mc_is_stall_and_eng_intr_pending(struct gk20a *g, u32 act_eng_id);
 #endif
diff --git a/drivers/gpu/nvgpu/gv11b/fb_gv11b.c b/drivers/gpu/nvgpu/gv11b/fb_gv11b.c
index 2ceb816b..54f0d2d8 100644
--- a/drivers/gpu/nvgpu/gv11b/fb_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/fb_gv11b.c
@@ -870,11 +870,10 @@ static void gv11b_fb_copy_from_hw_fault_buf(struct gk20a *g,
 static void gv11b_fb_handle_mmu_fault_common(struct gk20a *g,
 		 struct mmu_fault_info *mmfault, u32 *invalidate_replay_val)
 {
-	unsigned int id_type = ID_TYPE_UNKNOWN;
+	unsigned int id_type;
 	u32 num_lce, act_eng_bitmask = 0;
 	int err = 0;
-	u32 id = FIFO_INVAL_TSG_ID;
-	unsigned int rc_type = RC_TYPE_NO_RC;
+	u32 id = ((u32)~0);
 
 	if (!mmfault->valid)
 		return;
@@ -889,23 +888,18 @@ static void gv11b_fb_handle_mmu_fault_common(struct gk20a *g,
 		/* CE page faults are not reported as replayable */
 		nvgpu_log(g, gpu_dbg_intr, "CE Faulted");
 		err = gv11b_fb_fix_page_fault(g, mmfault);
-		if (mmfault->refch &&
-			(u32)mmfault->refch->tsgid != FIFO_INVAL_TSG_ID) {
-			gv11b_fifo_reset_pbdma_and_eng_faulted(g,
-				&g->fifo.tsg[mmfault->refch->tsgid],
-				mmfault->faulted_pbdma,
-				mmfault->faulted_engine);
-		}
+		gv11b_fifo_reset_pbdma_and_eng_faulted(g, mmfault->refch,
+			mmfault->faulted_pbdma, mmfault->faulted_engine);
 		if (!err) {
 			nvgpu_log(g, gpu_dbg_intr, "CE Page Fault Fixed");
 			*invalidate_replay_val = 0;
-			if (mmfault->refch) {
-				gk20a_channel_put(mmfault->refch);
-				mmfault->refch = NULL;
-			}
+			/* refch in mmfault is assigned at the time of copying
+			 * fault info from snap reg or bar2 fault buf
+			 */
+			gk20a_channel_put(mmfault->refch);
 			return;
 		}
-		/* Do recovery */
+		/* Do recovery. Channel recovery needs refch */
 		nvgpu_log(g, gpu_dbg_intr, "CE Page Fault Not Fixed");
 	}
 
@@ -917,9 +911,16 @@ static void gv11b_fb_handle_mmu_fault_common(struct gk20a *g,
 		 * instance block, the fault cannot be isolated to a
 		 * single context so we need to reset the entire runlist
 		 */
-			rc_type = RC_TYPE_MMU_FAULT;
+		id_type = ID_TYPE_UNKNOWN;
 
 		} else if (mmfault->refch) {
+			if (gk20a_is_channel_marked_as_tsg(mmfault->refch)) {
+				id = mmfault->refch->tsgid;
+				id_type = ID_TYPE_TSG;
+			} else {
+				id = mmfault->chid;
+				id_type = ID_TYPE_CHANNEL;
+			}
 			if (mmfault->refch->mmu_nack_handled) {
 				/* We have already recovered for the same
 				 * context, skip doing another recovery.
@@ -940,40 +941,19 @@ static void gv11b_fb_handle_mmu_fault_common(struct gk20a *g,
 				 */
 				gk20a_channel_put(mmfault->refch);
 				return;
-			} else {
-				/* Indicate recovery is handled if mmu fault is
-				 * a result of mmu nack.
-				 */
-				mmfault->refch->mmu_nack_handled = true;
-			}
-
-			rc_type = RC_TYPE_MMU_FAULT;
-			if (gk20a_is_channel_marked_as_tsg(mmfault->refch)) {
-				id = mmfault->refch->tsgid;
-				if (id != FIFO_INVAL_TSG_ID)
-					id_type = ID_TYPE_TSG;
-			} else {
-				nvgpu_err(g, "bare channels not supported");
 			}
+		} else {
+			id_type = ID_TYPE_UNKNOWN;
 		}
-
-		/* engine is faulted */
-		if (mmfault->faulted_engine != FIFO_INVAL_ENGINE_ID) {
+		if (mmfault->faulted_engine != FIFO_INVAL_ENGINE_ID)
 			act_eng_bitmask = BIT(mmfault->faulted_engine);
-			rc_type = RC_TYPE_MMU_FAULT;
-		}
 
-		/* refch in mmfault is assigned at the time of copying
-		 * fault info from snap reg or bar2 fault buf
+		/* Indicate recovery is handled if mmu fault is a result of
+		 * mmu nack.
 		 */
-		if (mmfault->refch) {
-			gk20a_channel_put(mmfault->refch);
-			mmfault->refch = NULL;
-		}
-
-		if (rc_type != RC_TYPE_NO_RC)
-			g->ops.fifo.teardown_ch_tsg(g, act_eng_bitmask,
-				id, id_type, rc_type, mmfault);
+		mmfault->refch->mmu_nack_handled = true;
+		g->ops.fifo.teardown_ch_tsg(g, act_eng_bitmask,
+			id, id_type, RC_TYPE_MMU_FAULT, mmfault);
 	} else {
 		if (mmfault->fault_type == gmmu_fault_type_pte_v()) {
 			nvgpu_log(g, gpu_dbg_intr, "invalid pte! try to fix");
@@ -992,10 +972,7 @@ static void gv11b_fb_handle_mmu_fault_common(struct gk20a *g,
 		/* refch in mmfault is assigned at the time of copying
 		 * fault info from snap reg or bar2 fault buf
 		 */
-		if (mmfault->refch) {
-			gk20a_channel_put(mmfault->refch);
-			mmfault->refch = NULL;
-		}
+		gk20a_channel_put(mmfault->refch);
 	}
 }
 
@@ -1084,10 +1061,8 @@ void gv11b_fb_handle_mmu_nonreplay_replay_fault(struct gk20a *g,
 			next_fault_addr = mmfault->fault_addr;
 			if (prev_fault_addr == next_fault_addr) {
 				nvgpu_log(g, gpu_dbg_intr, "pte already scanned");
-				if (mmfault->refch) {
+				if (mmfault->refch)
 					gk20a_channel_put(mmfault->refch);
-					mmfault->refch = NULL;
-				}
 				continue;
 			}
 		}
diff --git a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
index 1fe98c35..9c64675f 100644
--- a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
@@ -381,24 +381,17 @@ u32 gv11b_fifo_intr_0_error_mask(struct gk20a *g)
 
 u32 gv11b_fifo_get_preempt_timeout(struct gk20a *g)
 {
-	/* if timeouts are enabled, using 3000ms timeout
-	 * for polling pdma/eng/runlist might kick in
-	 * timeout handler in the cases where preempt
-	 * is stuck. Use 1000ms timeout for polling when
-	 * timeouts are enabled */
-	return nvgpu_is_timeouts_enabled(g) ? PREEMPT_TIMEOUT_1000_MS :
-		g->gr_idle_timeout_default;
+	return gk20a_get_gr_idle_timeout(g);
 }
 
 static int gv11b_fifo_poll_pbdma_chan_status(struct gk20a *g, u32 id,
-				 u32 pbdma_id)
+				 u32 pbdma_id, unsigned int timeout_rc_type)
 {
 	struct nvgpu_timeout timeout;
 	unsigned long delay = GR_IDLE_CHECK_DEFAULT; /* in micro seconds */
 	u32 pbdma_stat;
 	u32 chan_stat;
 	int ret = -EBUSY;
-	unsigned int loop_count = 0;
 
 	/* timeout in milli seconds */
 	nvgpu_timeout_init(g, &timeout, g->ops.fifo.get_preempt_timeout(g),
@@ -407,14 +400,6 @@ static int gv11b_fifo_poll_pbdma_chan_status(struct gk20a *g, u32 id,
 	nvgpu_log(g, gpu_dbg_info, "wait preempt pbdma %d", pbdma_id);
 	/* Verify that ch/tsg is no longer on the pbdma */
 	do {
-		if (!nvgpu_platform_is_silicon(g)) {
-			if (loop_count >= MAX_PRE_SI_RETRIES) {
-				nvgpu_err(g, "preempt pbdma retries: %u",
-					loop_count);
-				break;
-			}
-			loop_count++;
-		}
 		/*
 		 * If the PBDMA has a stalling interrupt and receives a NACK,
 		 * the PBDMA won't save out until the STALLING interrupt is
@@ -467,24 +452,21 @@ static int gv11b_fifo_poll_pbdma_chan_status(struct gk20a *g, u32 id,
 		nvgpu_usleep_range(delay, delay * 2);
 		delay = min_t(unsigned long,
 				delay << 1, GR_IDLE_CHECK_MAX);
-	} while (!nvgpu_timeout_expired(&timeout));
-
-	if (ret)
-		nvgpu_err(g, "preempt timeout pbdma: %u pbdma_stat: %u "
-				"tsgid: %u", pbdma_id, pbdma_stat, id);
+	} while (!nvgpu_timeout_expired_msg(&timeout,
+				 "preempt timeout pbdma"));
 	return ret;
 }
 
 static int gv11b_fifo_poll_eng_ctx_status(struct gk20a *g, u32 id,
-			 u32 act_eng_id, u32 *reset_eng_bitmask)
+			 u32 act_eng_id, u32 *reset_eng_bitmask,
+			 unsigned int timeout_rc_type)
 {
 	struct nvgpu_timeout timeout;
 	unsigned long delay = GR_IDLE_CHECK_DEFAULT; /* in micro seconds */
 	u32 eng_stat;
 	u32 ctx_stat;
 	int ret = -EBUSY;
-	unsigned int loop_count = 0;
-	u32 eng_intr_pending;
+	bool stall_intr = false;
 
 	/* timeout in milli seconds */
 	nvgpu_timeout_init(g, &timeout, g->ops.fifo.get_preempt_timeout(g),
@@ -494,56 +476,20 @@ static int gv11b_fifo_poll_eng_ctx_status(struct gk20a *g, u32 id,
 			act_eng_id);
 	/* Check if ch/tsg has saved off the engine or if ctxsw is hung */
 	do {
-		if (!nvgpu_platform_is_silicon(g)) {
-			if (loop_count >= MAX_PRE_SI_RETRIES) {
-				nvgpu_err(g, "preempt eng retries: %u",
-					loop_count);
-				break;
-			}
-			loop_count++;
-		}
 		eng_stat = gk20a_readl(g, fifo_engine_status_r(act_eng_id));
 		ctx_stat  = fifo_engine_status_ctx_status_v(eng_stat);
 
-		if (g->ops.mc.is_stall_and_eng_intr_pending(g, act_eng_id,
-					&eng_intr_pending)) {
-		/* From h/w team
-		 * Engine save can be blocked by eng  stalling interrupts.
-		 * FIFO interrupts shouldn’t block an engine save from
-		 * finishing, but could block FIFO from reporting preempt done.
-		 * No immediate reason to reset the engine if FIFO interrupt is
-		 * pending.
-		 * The hub, priv_ring, and ltc interrupts could block context
-		 * switch (or memory), but doesn’t necessarily have to.
-		 * For Hub interrupts they just report access counters and page
-		 * faults. Neither of these necessarily block context switch
-		 * or preemption, but they could.
-		 * For example a page fault for graphics would prevent graphics
-		 * from saving out. An access counter interrupt is a
-		 * notification and has no effect.
-		 * SW should handle page faults though for preempt to complete.
-		 * PRI interrupt (due to a failed PRI transaction) will result
-		 * in ctxsw failure reported to HOST.
-		 * LTC interrupts are generally ECC related and if so,
-		 * certainly don’t block preemption/ctxsw but they could.
-		 * Bus interrupts shouldn’t have anything to do with preemption
-		 * state as they are part of the Host EXT pipe, though they may
-		 * exhibit a symptom that indicates that GPU is in a bad state.
-		 * To be completely fair, when an engine is preempting SW
-		 * really should just handle other interrupts as they come in.
-		 * It’s generally bad to just poll and wait on a preempt
-		 * to complete since there are many things in the GPU which may
-		 * cause a system to hang/stop responding.
-		 */
+		if (g->ops.mc.is_stall_and_eng_intr_pending(g, act_eng_id)) {
+			stall_intr = true;
 			nvgpu_log(g, gpu_dbg_info | gpu_dbg_intr,
 					"stall intr set, "
-					"preemption might not finish");
+					"preemption will not finish");
 		}
 		if (ctx_stat ==
 			 fifo_engine_status_ctx_status_ctxsw_switch_v()) {
 			/* Eng save hasn't started yet. Continue polling */
-			if (eng_intr_pending) {
-				/* if eng intr, stop polling */
+			if (stall_intr) {
+				/* if stall intr stop polling */
 				*reset_eng_bitmask |= BIT(act_eng_id);
 				ret = 0;
 				break;
@@ -555,7 +501,8 @@ static int gv11b_fifo_poll_eng_ctx_status(struct gk20a *g, u32 id,
 			 fifo_engine_status_ctx_status_ctxsw_save_v()) {
 
 			if (id == fifo_engine_status_id_v(eng_stat)) {
-				if (eng_intr_pending) {
+				if (stall_intr ||
+					timeout_rc_type == PREEMPT_TIMEOUT_NORC) {
 					/* preemption will not finish */
 					*reset_eng_bitmask |= BIT(act_eng_id);
 					ret = 0;
@@ -571,7 +518,9 @@ static int gv11b_fifo_poll_eng_ctx_status(struct gk20a *g, u32 id,
 			 fifo_engine_status_ctx_status_ctxsw_load_v()) {
 
 			if (id == fifo_engine_status_next_id_v(eng_stat)) {
-				if (eng_intr_pending) {
+
+				if (stall_intr ||
+					timeout_rc_type == PREEMPT_TIMEOUT_NORC) {
 					/* preemption will not finish */
 					*reset_eng_bitmask |= BIT(act_eng_id);
 					ret = 0;
@@ -591,21 +540,8 @@ static int gv11b_fifo_poll_eng_ctx_status(struct gk20a *g, u32 id,
 		nvgpu_usleep_range(delay, delay * 2);
 		delay = min_t(unsigned long,
 				delay << 1, GR_IDLE_CHECK_MAX);
-	} while (!nvgpu_timeout_expired(&timeout));
-
-	if (ret) {
-		/*
-		* The reasons a preempt can fail are:
-		* 1.Some other stalling interrupt is asserted preventing
-		*   channel or context save.
-		* 2.The memory system hangs.
-		* 3.The engine hangs during CTXSW.
-		*/
-		nvgpu_err(g, "preempt timeout eng: %u ctx_stat: %u tsgid: %u",
-			act_eng_id, ctx_stat, id);
-		*reset_eng_bitmask |= BIT(act_eng_id);
-	}
-
+	} while (!nvgpu_timeout_expired_msg(&timeout,
+				 "preempt timeout eng"));
 	return ret;
 }
 
@@ -652,19 +588,29 @@ static void gv11b_reset_pbdma_faulted_tsg(struct tsg_gk20a *tsg)
 }
 
 void gv11b_fifo_reset_pbdma_and_eng_faulted(struct gk20a *g,
-			struct tsg_gk20a *tsg,
+			struct channel_gk20a *refch,
 			u32 faulted_pbdma, u32 faulted_engine)
 {
-	if (!tsg)
-		return;
+	struct tsg_gk20a *tsg;
 
 	nvgpu_log(g, gpu_dbg_intr, "reset faulted pbdma:0x%x eng:0x%x",
 				faulted_pbdma, faulted_engine);
 
-	if (faulted_pbdma != FIFO_INVAL_PBDMA_ID)
-		gv11b_reset_pbdma_faulted_tsg(tsg);
-	if (faulted_engine != FIFO_INVAL_ENGINE_ID)
-		gv11b_reset_eng_faulted_tsg(tsg);
+	if (!refch)
+		return;
+
+	if (gk20a_is_channel_marked_as_tsg(refch)) {
+		tsg = &g->fifo.tsg[refch->tsgid];
+		if (faulted_pbdma != FIFO_INVAL_PBDMA_ID)
+			gv11b_reset_pbdma_faulted_tsg(tsg);
+		if (faulted_engine != FIFO_INVAL_ENGINE_ID)
+			gv11b_reset_eng_faulted_tsg(tsg);
+	} else {
+		if (faulted_pbdma != FIFO_INVAL_PBDMA_ID)
+			gv11b_reset_pbdma_faulted_ch(g, refch->chid);
+		if (faulted_engine != FIFO_INVAL_ENGINE_ID)
+			gv11b_reset_eng_faulted_ch(g, refch->chid);
+	}
 }
 
 static u32 gv11b_fifo_get_runlists_mask(struct gk20a *g, u32 act_eng_bitmask,
@@ -674,7 +620,7 @@ static u32 gv11b_fifo_get_runlists_mask(struct gk20a *g, u32 act_eng_bitmask,
 	u32 runlists_mask = 0;
 	struct fifo_gk20a *f = &g->fifo;
 	struct fifo_runlist_info_gk20a *runlist;
-	u32 rlid, pbdma_bitmask = 0;
+	u32 pbdma_bitmask = 0;
 
 	if (id_type != ID_TYPE_UNKNOWN) {
 		if (id_type == ID_TYPE_TSG)
@@ -689,31 +635,31 @@ static u32 gv11b_fifo_get_runlists_mask(struct gk20a *g, u32 act_eng_bitmask,
 		if (mmfault->faulted_pbdma != FIFO_INVAL_PBDMA_ID)
 			pbdma_bitmask = BIT(mmfault->faulted_pbdma);
 
-		for (rlid = 0; rlid < f->max_runlists; rlid++) {
+		for (id = 0; id < f->max_runlists; id++) {
 
-			runlist = &f->runlist_info[rlid];
+			runlist = &f->runlist_info[id];
 
 			if (runlist->eng_bitmask & act_eng_bitmask)
 				runlists_mask |=
-				 fifo_sched_disable_runlist_m(rlid);
+				 fifo_sched_disable_runlist_m(id);
 
 			if (runlist->pbdma_bitmask & pbdma_bitmask)
 				runlists_mask |=
-				 fifo_sched_disable_runlist_m(rlid);
+				 fifo_sched_disable_runlist_m(id);
 		}
 	}
 
 	if (id_type == ID_TYPE_UNKNOWN) {
-		for (rlid = 0; rlid < f->max_runlists; rlid++) {
+		for (id = 0; id < f->max_runlists; id++) {
 			if (act_eng_bitmask) {
 				/* eng ids are known */
-				runlist = &f->runlist_info[rlid];
+				runlist = &f->runlist_info[id];
 				if (runlist->eng_bitmask & act_eng_bitmask)
 					runlists_mask |=
-					fifo_sched_disable_runlist_m(rlid);
+					fifo_sched_disable_runlist_m(id);
 			} else {
 				runlists_mask |=
-					fifo_sched_disable_runlist_m(rlid);
+					fifo_sched_disable_runlist_m(id);
 			}
 		}
 	}
@@ -745,20 +691,10 @@ static int gv11b_fifo_poll_runlist_preempt_pending(struct gk20a *g,
 	struct nvgpu_timeout timeout;
 	u32 delay = GR_IDLE_CHECK_DEFAULT;
 	int ret = -EBUSY;
-	unsigned int loop_count = 0;
 
 	nvgpu_timeout_init(g, &timeout, g->ops.fifo.get_preempt_timeout(g),
 			   NVGPU_TIMER_CPU_TIMER);
 	do {
-		if (!nvgpu_platform_is_silicon(g)) {
-			if (loop_count >= MAX_PRE_SI_RETRIES) {
-				nvgpu_err(g, "preempt runlist retries: %u",
-					loop_count);
-				break;
-			}
-			loop_count++;
-		}
-
 		if (!((gk20a_readl(g, fifo_runlist_preempt_r())) &
 				 runlists_mask)) {
 			ret = 0;
@@ -768,16 +704,13 @@ static int gv11b_fifo_poll_runlist_preempt_pending(struct gk20a *g,
 		nvgpu_usleep_range(delay, delay * 2);
 		delay = min_t(unsigned long,
 				delay << 1, GR_IDLE_CHECK_MAX);
-	} while (!nvgpu_timeout_expired(&timeout));
-
-	if (ret)
-		nvgpu_err(g, "preempt runlist timeout, runlists_mask:0x%08x",
-				runlists_mask);
+	} while (!nvgpu_timeout_expired_msg(&timeout,
+				 "runlist preempt timeout"));
 	return ret;
 }
 
 int gv11b_fifo_is_preempt_pending(struct gk20a *g, u32 id,
-		 unsigned int id_type)
+		 unsigned int id_type, unsigned int timeout_rc_type)
 {
 	struct fifo_gk20a *f = &g->fifo;
 	unsigned long runlist_served_pbdmas;
@@ -785,6 +718,7 @@ int gv11b_fifo_is_preempt_pending(struct gk20a *g, u32 id,
 	u32 pbdma_id;
 	u32 act_eng_id;
 	u32 runlist_id;
+	int func_ret;
 	int ret = 0;
 	u32 tsgid;
 
@@ -801,14 +735,30 @@ int gv11b_fifo_is_preempt_pending(struct gk20a *g, u32 id,
 	runlist_served_pbdmas = f->runlist_info[runlist_id].pbdma_bitmask;
 	runlist_served_engines = f->runlist_info[runlist_id].eng_bitmask;
 
-	for_each_set_bit(pbdma_id, &runlist_served_pbdmas, f->num_pbdma)
-		ret |= gv11b_fifo_poll_pbdma_chan_status(g, tsgid, pbdma_id);
+	for_each_set_bit(pbdma_id, &runlist_served_pbdmas, f->num_pbdma) {
+
+		func_ret = gv11b_fifo_poll_pbdma_chan_status(g, tsgid, pbdma_id,
+							 timeout_rc_type);
+		if (func_ret != 0) {
+			nvgpu_log_info(g, "preempt timeout pbdma %d", pbdma_id);
+			ret |= func_ret;
+		}
+	}
 
 	f->runlist_info[runlist_id].reset_eng_bitmask = 0;
 
-	for_each_set_bit(act_eng_id, &runlist_served_engines, f->max_engines)
-		ret |= gv11b_fifo_poll_eng_ctx_status(g, tsgid, act_eng_id,
-				&f->runlist_info[runlist_id].reset_eng_bitmask);
+	for_each_set_bit(act_eng_id, &runlist_served_engines, f->max_engines) {
+
+		func_ret = gv11b_fifo_poll_eng_ctx_status(g, tsgid, act_eng_id,
+				&f->runlist_info[runlist_id].reset_eng_bitmask,
+				 timeout_rc_type);
+
+		if (func_ret != 0) {
+			nvgpu_log_info(g, "preempt timeout engine %d", act_eng_id);
+			ret |= func_ret;
+		}
+	}
+
 	return ret;
 }
 
@@ -887,9 +837,6 @@ int gv11b_fifo_preempt_tsg(struct gk20a *g, u32 tsgid)
 
 	nvgpu_mutex_acquire(&f->runlist_info[runlist_id].runlist_lock);
 
-	/* WAR for Bug 2065990 */
-	gk20a_fifo_disable_tsg_sched(g, &f->tsg[tsgid]);
-
 	mutex_ret = nvgpu_pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
 
 	ret = __locked_fifo_preempt(g, tsgid, true);
@@ -897,9 +844,6 @@ int gv11b_fifo_preempt_tsg(struct gk20a *g, u32 tsgid)
 	if (!mutex_ret)
 		nvgpu_pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
 
-	/* WAR for Bug 2065990 */
-	gk20a_fifo_enable_tsg_sched(g, &f->tsg[tsgid]);
-
 	nvgpu_mutex_release(&f->runlist_info[runlist_id].runlist_lock);
 
 	if (ret)
@@ -908,36 +852,44 @@ int gv11b_fifo_preempt_tsg(struct gk20a *g, u32 tsgid)
 	return ret;
 }
 
-static void gv11b_fifo_locked_preempt_runlists(struct gk20a *g, u32 runlists_mask)
+static int gv11b_fifo_preempt_runlists(struct gk20a *g, u32 runlists_mask)
 {
 	int ret = 0;
 	u32 token = PMU_INVALID_MUTEX_OWNER_ID;
 	u32 mutex_ret = 0;
-	u32 rlid;
+	u32 runlist_id;
+
+	nvgpu_log_fn(g, " ");
 
-	/* runlist_lock are locked by teardown and sched are disabled too */
-	nvgpu_log_fn(g, "preempt runlists_mask:0x%08x", runlists_mask);
+	for (runlist_id = 0; runlist_id < g->fifo.max_runlists; runlist_id++) {
+		if (runlists_mask & fifo_runlist_preempt_runlist_m(runlist_id))
+			nvgpu_mutex_acquire(&g->fifo.
+				runlist_info[runlist_id].runlist_lock);
+	}
 
 	mutex_ret = nvgpu_pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
 
 	ret = __locked_fifo_preempt_runlists(g, runlists_mask);
 
-	if (ret) {
-		/* if preempt timed out, reset engs served by runlists */
-		for (rlid = 0; rlid < g->fifo.max_runlists; rlid++) {
-			if (runlists_mask &
-				fifo_runlist_preempt_runlist_m(rlid))
-				g->fifo.runlist_info[rlid].reset_eng_bitmask =
-				g->fifo.runlist_info[rlid].eng_bitmask;
+	if (!mutex_ret)
+		nvgpu_pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
+
+	for (runlist_id = 0; runlist_id < g->fifo.max_runlists; runlist_id++) {
+		if (runlists_mask &
+				fifo_runlist_preempt_runlist_m(runlist_id)) {
+			/* during recovery reset engs served by this runlist */
+			g->fifo.runlist_info[runlist_id].reset_eng_bitmask =
+				 g->fifo.runlist_info[runlist_id].eng_bitmask;
+			nvgpu_mutex_release(&g->fifo.
+				runlist_info[runlist_id].runlist_lock);
 		}
 	}
 
-	if (!mutex_ret)
-		nvgpu_pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
+	return ret;
 }
 
 static int __locked_fifo_preempt_ch_tsg(struct gk20a *g, u32 id,
-			 unsigned int id_type)
+			 unsigned int id_type, unsigned int timeout_rc_type)
 {
 	int ret;
 	struct fifo_gk20a *f = &g->fifo;
@@ -951,97 +903,52 @@ static int __locked_fifo_preempt_ch_tsg(struct gk20a *g, u32 id,
 		gk20a_fifo_issue_preempt(g, id, true);
 
 	/* wait for preempt */
-	ret = g->ops.fifo.is_preempt_pending(g, id, id_type);
+	ret = g->ops.fifo.is_preempt_pending(g, id, id_type,
+					 timeout_rc_type);
 
-	/* No recovery even if preempt timed out since
-	 * this is called from recovery path
-	 */
+	if (ret && (timeout_rc_type == PREEMPT_TIMEOUT_RC))
+		gk20a_fifo_preempt_timeout_rc(g, id, id_type);
 
 	return ret;
 }
 
 
 int gv11b_fifo_preempt_ch_tsg(struct gk20a *g, u32 id,
-			 unsigned int id_type)
+			 unsigned int id_type, unsigned int timeout_rc_type)
 {
+	struct fifo_gk20a *f = &g->fifo;
 	u32 ret = 0;
 	u32 token = PMU_INVALID_MUTEX_OWNER_ID;
 	u32 mutex_ret = 0;
+	u32 runlist_id;
 
-	mutex_ret = nvgpu_pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
-	/*
-	 * This is called from teardown path only. runlist_lock
-	 * is already acquired before calling this function.
-	 */
-	ret = __locked_fifo_preempt_ch_tsg(g, id, id_type);
-
-	if (!mutex_ret)
-		nvgpu_pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
-
-	return ret;
+	if (id_type == ID_TYPE_TSG)
+		runlist_id = f->tsg[id].runlist_id;
+	else if (id_type == ID_TYPE_CHANNEL)
+		runlist_id = f->channel[id].runlist_id;
+	else
+		return -EINVAL;
 
-}
+	if (runlist_id >= g->fifo.max_runlists) {
+		nvgpu_log_info(g, "runlist_id = %d", runlist_id);
+		return -EINVAL;
+	}
 
-static void gv11b_fifo_locked_abort_runlist_active_tsgs(struct gk20a *g,
-			unsigned int rc_type,
-			u32 runlists_mask)
-{
-	struct tsg_gk20a *tsg = NULL;
-	u32 rlid, tsgid;
-	struct fifo_runlist_info_gk20a *runlist = NULL;
-	u32 token = PMU_INVALID_MUTEX_OWNER_ID;
-	u32 mutex_ret = 0;
-	bool add = false, wait_for_finish = false;
-	int err;
+	nvgpu_log_fn(g, "preempt id = %d, runlist_id = %d", id, runlist_id);
 
-	nvgpu_err(g, "runlist id unknown, abort active tsgs in runlists");
+	nvgpu_mutex_acquire(&f->runlist_info[runlist_id].runlist_lock);
 
-	/* runlist_lock  are locked by teardown */
 	mutex_ret = nvgpu_pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
 
-	for (rlid = 0; rlid < g->fifo.max_runlists;
-						 rlid++) {
-		if (!(runlists_mask & BIT(rlid)))
-			continue;
-		nvgpu_log(g, gpu_dbg_info, "abort runlist id %d",
-				rlid);
-		runlist = &g->fifo.runlist_info[rlid];
-
-		for_each_set_bit(tsgid, runlist->active_tsgs,
-			g->fifo.num_channels) {
-			nvgpu_log(g, gpu_dbg_info, "abort tsg id %d", tsgid);
-			tsg = &g->fifo.tsg[tsgid];
-			gk20a_disable_tsg(tsg);
-
-			/* assume all pbdma and eng faulted are set */
-			nvgpu_log(g, gpu_dbg_info, "reset pbdma and eng faulted");
-			gv11b_reset_pbdma_faulted_tsg(tsg);
-			gv11b_reset_eng_faulted_tsg(tsg);
+	ret = __locked_fifo_preempt_ch_tsg(g, id, id_type, timeout_rc_type);
 
-#ifdef CONFIG_GK20A_CTXSW_TRACE
-			gk20a_ctxsw_trace_tsg_reset(g, tsg);
-#endif
-			if (!g->fifo.deferred_reset_pending) {
-				if (rc_type == RC_TYPE_MMU_FAULT) {
-					gk20a_fifo_set_ctx_mmu_error_tsg(g, tsg);
-					gk20a_fifo_error_tsg(g, tsg);
-				}
-			}
+	if (!mutex_ret)
+		nvgpu_pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
 
-			/* (chid == ~0 && !add) remove all act ch from runlist*/
-			err = gk20a_fifo_update_runlist_locked(g, rlid,
-					FIFO_INVAL_CHANNEL_ID, add, wait_for_finish);
-			if (err)
-				nvgpu_err(g, "runlist id %d is not cleaned up",
-					rlid);
+	nvgpu_mutex_release(&f->runlist_info[runlist_id].runlist_lock);
 
-			gk20a_fifo_abort_tsg(g, tsg->tsgid, false);
+	return ret;
 
-			nvgpu_log(g, gpu_dbg_info, "aborted tsg id %d", tsgid);
-		}
-	}
-	if (!mutex_ret)
-		nvgpu_pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
 }
 
 void gv11b_fifo_teardown_ch_tsg(struct gk20a *g, u32 act_eng_bitmask,
@@ -1049,66 +956,10 @@ void gv11b_fifo_teardown_ch_tsg(struct gk20a *g, u32 act_eng_bitmask,
 			 struct mmu_fault_info *mmfault)
 {
 	struct tsg_gk20a *tsg = NULL;
-	u32 runlists_mask, rlid;
+	struct channel_gk20a *refch = NULL;
+	u32 runlists_mask, runlist_id;
 	struct fifo_runlist_info_gk20a *runlist = NULL;
 	u32 engine_id, client_type = ~0;
-	struct fifo_gk20a *f = &g->fifo;
-	u32 runlist_id = FIFO_INVAL_RUNLIST_ID;
-	u32 num_runlists = 0;
-
-	nvgpu_log_fn(g, "acquire runlist_lock for all runlists");
-	for (rlid = 0; rlid < g->fifo.max_runlists; rlid++)
-		nvgpu_mutex_acquire(&f->runlist_info[rlid].
-			runlist_lock);
-
-	/* get runlist id and tsg */
-	if (id_type == ID_TYPE_TSG) {
-		if (id != FIFO_INVAL_TSG_ID) {
-			tsg = &g->fifo.tsg[id];
-			runlist_id = tsg->runlist_id;
-			if (runlist_id != FIFO_INVAL_RUNLIST_ID)
-				num_runlists++;
-			else
-				nvgpu_log_fn(g, "tsg runlist id is invalid");
-		} else {
-			nvgpu_log_fn(g, "id type is tsg but tsg id is inval");
-		}
-	} else {
-		/*
-		 * id type is unknown, get runlist_id if eng mask is such that
-		 * it corresponds to single runlist id. If eng mask corresponds
-		 * to multiple runlists, then abort all runlists
-		 */
-		for (rlid = 0; rlid < f->max_runlists; rlid++) {
-			if (act_eng_bitmask) {
-				/* eng ids are known */
-				runlist = &f->runlist_info[rlid];
-				if (runlist->eng_bitmask & act_eng_bitmask) {
-					runlist_id = rlid;
-					num_runlists++;
-				}
-			} else {
-				break;
-			}
-		}
-		if (num_runlists > 1 ) /* abort all runlists */
-			runlist_id = FIFO_INVAL_RUNLIST_ID;
-	}
-
-	/* if runlist_id is valid and there is only single runlist to be
-	 * aborted, release runlist lock that are not
-	 * needed for this recovery
-	 */
-	if (runlist_id != FIFO_INVAL_RUNLIST_ID && num_runlists == 1) {
-		for (rlid = 0; rlid < g->fifo.max_runlists; rlid++) {
-			if (rlid != runlist_id) {
-				nvgpu_log_fn(g, "release runlist_lock for "
-					"unused runlist id: %d", rlid);
-				nvgpu_mutex_release(&f->runlist_info[rlid].
-					runlist_lock);
-			}
-		}
-	}
 
 	nvgpu_log(g, gpu_dbg_info, "id = %d, id_type = %d, rc_type = %d, "
 			"act_eng_bitmask = 0x%x, mmfault ptr = 0x%p",
@@ -1117,7 +968,6 @@ void gv11b_fifo_teardown_ch_tsg(struct gk20a *g, u32 act_eng_bitmask,
 	runlists_mask =  gv11b_fifo_get_runlists_mask(g, act_eng_bitmask, id,
 					 id_type, rc_type, mmfault);
 
-	/* Disable runlist scheduler */
 	gk20a_fifo_set_runlist_state(g, runlists_mask, RUNLIST_DISABLED);
 
 	g->fifo.deferred_reset_pending = false;
@@ -1139,41 +989,41 @@ void gv11b_fifo_teardown_ch_tsg(struct gk20a *g, u32 act_eng_bitmask,
 
 	gr_gk20a_init_cg_mode(g, ELCG_MODE, ELCG_RUN);
 
+	/* Get tsg/ch */
 	if (rc_type == RC_TYPE_MMU_FAULT) {
 		gk20a_debug_dump(g);
+		refch = mmfault->refch;
 		client_type = mmfault->client_type;
-		gv11b_fifo_reset_pbdma_and_eng_faulted(g, tsg,
-				mmfault->faulted_pbdma,
-				mmfault->faulted_engine);
+		gv11b_fifo_reset_pbdma_and_eng_faulted(g, refch,
+			mmfault->faulted_pbdma,
+			mmfault->faulted_engine);
 	}
 
+	if (id_type == ID_TYPE_TSG) {
+		tsg = &g->fifo.tsg[id];
+	} else if (id_type == ID_TYPE_CHANNEL) {
+		if (refch == NULL)
+			refch = gk20a_channel_get(&g->fifo.channel[id]);
+	}
+	/* Disable tsg/ch */
 	if (tsg)
 		gk20a_disable_tsg(tsg);
+	else if (refch)
+		g->ops.fifo.disable_channel(refch);
 
-	/*
-	 * Even though TSG preempt timed out, the RC sequence would by design
-	 * require s/w to issue another preempt.
-	 * If recovery includes an ENGINE_RESET, to not have race conditions,
-	 * use RUNLIST_PREEMPT to kick all work off, and cancel any context
-	 * load which may be pending. This is also needed to make sure
-	 * that all PBDMAs serving the engine are not loaded when engine is
-	 * reset.
-	 */
-	if (tsg) {
-		int preempt_failed;
-
-		preempt_failed = g->ops.fifo.preempt_ch_tsg(g, id, id_type);
-		if (preempt_failed)
-			gv11b_fifo_locked_preempt_runlists(g, runlists_mask);
+	/* Preempt tsg/ch */
+	if (id_type == ID_TYPE_TSG || id_type == ID_TYPE_CHANNEL) {
+		g->ops.fifo.preempt_ch_tsg(g, id, id_type,
+					 PREEMPT_TIMEOUT_NORC);
 	} else {
-		gv11b_fifo_locked_preempt_runlists(g, runlists_mask);
+		gv11b_fifo_preempt_runlists(g, runlists_mask);
 	}
 
 	/* check if engine reset should be deferred */
-	for (rlid = 0; rlid < g->fifo.max_runlists; rlid++) {
+	for (runlist_id = 0; runlist_id < g->fifo.max_runlists; runlist_id++) {
 
-		runlist = &g->fifo.runlist_info[rlid];
-		if ((runlists_mask & BIT(rlid)) &&
+		runlist = &g->fifo.runlist_info[runlist_id];
+		if ((runlists_mask & BIT(runlist_id)) &&
 					runlist->reset_eng_bitmask) {
 
 			unsigned long __reset_eng_bitmask =
@@ -1181,7 +1031,7 @@ void gv11b_fifo_teardown_ch_tsg(struct gk20a *g, u32 act_eng_bitmask,
 
 			for_each_set_bit(engine_id, &__reset_eng_bitmask,
 							g->fifo.max_engines) {
-				if (tsg &&
+				if ((refch || tsg) &&
 					 gk20a_fifo_should_defer_engine_reset(g,
 					engine_id, client_type, false)) {
 
@@ -1213,9 +1063,13 @@ void gv11b_fifo_teardown_ch_tsg(struct gk20a *g, u32 act_eng_bitmask,
 	}
 
 #ifdef CONFIG_GK20A_CTXSW_TRACE
+	/* tsg and refch both could be valid for mmu fault. Check tsg first */
 	if (tsg)
 		gk20a_ctxsw_trace_tsg_reset(g, tsg);
+	else if (refch)
+		gk20a_ctxsw_trace_channel_reset(g, refch);
 #endif
+
 	if (tsg) {
 		if (g->fifo.deferred_reset_pending) {
 			gk20a_disable_tsg(tsg);
@@ -1225,9 +1079,26 @@ void gv11b_fifo_teardown_ch_tsg(struct gk20a *g, u32 act_eng_bitmask,
 
 			gk20a_fifo_abort_tsg(g, tsg->tsgid, false);
 		}
+		if (refch)
+			gk20a_channel_put(refch);
+	} else if (refch) {
+		if (g->fifo.deferred_reset_pending) {
+			g->ops.fifo.disable_channel(refch);
+		} else {
+			if (rc_type == RC_TYPE_MMU_FAULT)
+				gk20a_fifo_set_ctx_mmu_error_ch(g, refch);
+
+			gk20a_channel_abort(refch, false);
+		}
+		gk20a_channel_put(refch);
 	} else {
-		gv11b_fifo_locked_abort_runlist_active_tsgs(g, rc_type,
-			runlists_mask);
+		nvgpu_err(g, "id unknown, abort runlist");
+		for (runlist_id = 0; runlist_id < g->fifo.max_runlists;
+						 runlist_id++) {
+			if (runlists_mask & BIT(runlist_id))
+				g->ops.fifo.update_runlist(g, runlist_id,
+					 FIFO_INVAL_CHANNEL_ID, false, true);
+		}
 	}
 
 	gk20a_fifo_set_runlist_state(g, runlists_mask, RUNLIST_ENABLED);
@@ -1235,18 +1106,6 @@ void gv11b_fifo_teardown_ch_tsg(struct gk20a *g, u32 act_eng_bitmask,
 	/* It is safe to enable ELPG again. */
 	if (g->support_pmu && g->elpg_enabled)
 		nvgpu_pmu_enable_elpg(g);
-
-	/* release runlist_lock */
-	if (runlist_id != FIFO_INVAL_RUNLIST_ID) {
-		nvgpu_log_fn(g, "release runlist_lock runlist_id = %d",
-				runlist_id);
-		nvgpu_mutex_release(&f->runlist_info[runlist_id].runlist_lock);
-	} else {
-		nvgpu_log_fn(g, "release runlist_lock for all runlists");
-		for (rlid = 0; rlid < g->fifo.max_runlists; rlid++)
-			nvgpu_mutex_release(&f->runlist_info[rlid].
-				runlist_lock);
-	}
 }
 
 void gv11b_fifo_init_pbdma_intr_descs(struct fifo_gk20a *f)
diff --git a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.h b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.h
index aee7aef2..1ae3c93e 100644
--- a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.h
+++ b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.h
@@ -50,13 +50,10 @@
 
 #define CHANNEL_INFO_VEID0		0
 
-#define MAX_PRE_SI_RETRIES		200000	/* 1G/500KHz * 100 */
-#define PREEMPT_TIMEOUT_1000_MS		1000
-
 struct gpu_ops;
 
 void gv11b_fifo_reset_pbdma_and_eng_faulted(struct gk20a *g,
-			struct tsg_gk20a *tsg,
+			struct channel_gk20a *refch,
 			u32 faulted_pbdma, u32 faulted_engine);
 void gv11b_mmu_fault_id_to_eng_pbdma_id_and_veid(struct gk20a *g,
 	u32 mmu_fault_id, u32 *active_engine_id, u32 *veid, u32 *pbdma_id);
@@ -81,11 +78,12 @@ void gv11b_dump_eng_status(struct gk20a *g,
 u32 gv11b_fifo_intr_0_error_mask(struct gk20a *g);
 int gv11b_fifo_reschedule_runlist(struct channel_gk20a *ch, bool preempt_next);
 int gv11b_fifo_is_preempt_pending(struct gk20a *g, u32 id,
-		 unsigned int id_type);
+		 unsigned int id_type, unsigned int timeout_rc_type);
 int gv11b_fifo_preempt_channel(struct gk20a *g, u32 chid);
 int gv11b_fifo_preempt_tsg(struct gk20a *g, u32 tsgid);
 int gv11b_fifo_enable_tsg(struct tsg_gk20a *tsg);
-int gv11b_fifo_preempt_ch_tsg(struct gk20a *g, u32 id, unsigned int id_type);
+int gv11b_fifo_preempt_ch_tsg(struct gk20a *g, u32 id,
+			 unsigned int id_type, unsigned int timeout_rc_type);
 void gv11b_fifo_teardown_ch_tsg(struct gk20a *g, u32 act_eng_bitmask,
 			u32 id, unsigned int id_type, unsigned int rc_type,
 			 struct mmu_fault_info *mmfault);
diff --git a/drivers/gpu/nvgpu/gv11b/mc_gv11b.c b/drivers/gpu/nvgpu/gv11b/mc_gv11b.c
index dbeb0645..31600828 100644
--- a/drivers/gpu/nvgpu/gv11b/mc_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/mc_gv11b.c
@@ -71,24 +71,24 @@ bool gv11b_mc_is_intr_hub_pending(struct gk20a *g, u32 mc_intr_0)
 	return (((mc_intr_0 & mc_intr_hub_pending_f()) != 0U) ? true : false);
 }
 
-bool gv11b_mc_is_stall_and_eng_intr_pending(struct gk20a *g, u32 act_eng_id,
-			u32 *eng_intr_pending)
+bool gv11b_mc_is_stall_and_eng_intr_pending(struct gk20a *g, u32 act_eng_id)
 {
 	u32 mc_intr_0 = gk20a_readl(g, mc_intr_r(0));
 	u32 stall_intr, eng_intr_mask;
 
 	eng_intr_mask = gk20a_fifo_act_eng_interrupt_mask(g, act_eng_id);
-	*eng_intr_pending = mc_intr_0 & eng_intr_mask;
+	if ((mc_intr_0 & eng_intr_mask) != 0U) {
+		return true;
+	}
 
 	stall_intr = mc_intr_pfifo_pending_f() |
 			mc_intr_hub_pending_f() |
 			mc_intr_priv_ring_pending_f() |
 			mc_intr_pbus_pending_f() |
 			mc_intr_ltc_pending_f();
+	if ((mc_intr_0 & stall_intr) != 0U) {
+		return true;
+	}
 
-	nvgpu_log(g, gpu_dbg_info | gpu_dbg_intr,
-		"mc_intr_0 = 0x%08x, eng_intr = 0x%08x",
-		mc_intr_0 & stall_intr, *eng_intr_pending);
-
-	return (mc_intr_0 & (eng_intr_mask | stall_intr)) != 0U;
+	return false;
 }
diff --git a/drivers/gpu/nvgpu/gv11b/mc_gv11b.h b/drivers/gpu/nvgpu/gv11b/mc_gv11b.h
index faa4d38d..eb9d0e4e 100644
--- a/drivers/gpu/nvgpu/gv11b/mc_gv11b.h
+++ b/drivers/gpu/nvgpu/gv11b/mc_gv11b.h
@@ -26,6 +26,5 @@ struct gk20a;
 
 void mc_gv11b_intr_enable(struct gk20a *g);
 bool gv11b_mc_is_intr_hub_pending(struct gk20a *g, u32 mc_intr_0);
-bool gv11b_mc_is_stall_and_eng_intr_pending(struct gk20a *g, u32 act_eng_id,
-			u32 *eng_intr_pending);
+bool gv11b_mc_is_stall_and_eng_intr_pending(struct gk20a *g, u32 act_eng_id);
 #endif
-- 
cgit v1.2.2