From 89e0745fa024891b988508c3baa20c453230a80b Mon Sep 17 00:00:00 2001
From: Deepak Nibade <dnibade@nvidia.com>
Date: Mon, 2 Apr 2018 19:10:42 +0530
Subject: gpu: nvgpu: handle misaligned_addr SM exception

We right now do not handle misaligned_addr SM exception explicitly and hence
we incorrectly initiate CILP on this exception

Handle this exception explicitly in this sequence -
- set error notifier first
- clear the interrupt
- return error from gr_gv11b_handle_warp_esr_error_misaligned_addr() so that
  RC recovery is triggered by gk20a_gr_isr()

Ensure that the error value is propagated back to gk20a_gr_isr() correctly

Use nvgpu_set_error_notifier_if_empty() to set error notifier since this will
prevent overwriting of error notifier value in case gk20a_gr_isr() also tries
to write to some error notifier value

Bug 200388475
Jira NVGPU-554

Change-Id: I84c4d202a8068e738567ccd344e05d9d5f6ad2f0
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1686781
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/gk20a/gr_gk20a.c                 | 14 ++++----
 drivers/gpu/nvgpu/gv100/hal_gv100.c                |  2 +-
 drivers/gpu/nvgpu/gv11b/gr_gv11b.c                 | 40 ++++++++++++++++++++++
 drivers/gpu/nvgpu/gv11b/hal_gv11b.c                |  2 +-
 .../gpu/nvgpu/include/nvgpu/hw/gv100/hw_gr_gv100.h |  4 +++
 .../gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h |  4 +++
 6 files changed, 57 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index c6a58fec..680b1637 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -5740,7 +5740,7 @@ static int gk20a_gr_handle_tpc_exception(struct gk20a *g, u32 gpc, u32 tpc,
 				"GPC%d TPC%d: SM%d exception pending",
 				 gpc, tpc, sm);
 
-			ret = g->ops.gr.handle_sm_exception(g,
+			ret |= g->ops.gr.handle_sm_exception(g,
 				 gpc, tpc, sm, post_event, fault_ch,
 				hww_global_esr);
 			/* clear the hwws, also causes tpc and gpc
@@ -5759,11 +5759,11 @@ static int gk20a_gr_handle_tpc_exception(struct gk20a *g, u32 gpc, u32 tpc,
 			gr_gpc0_tpc0_tpccs_tpc_exception_tex_pending_v()) {
 		gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg,
 				"GPC%d TPC%d: TEX exception pending", gpc, tpc);
-		ret = g->ops.gr.handle_tex_exception(g, gpc, tpc, post_event);
+		ret |= g->ops.gr.handle_tex_exception(g, gpc, tpc, post_event);
 	}
 
 	if (g->ops.gr.handle_tpc_mpc_exception)
-		ret = g->ops.gr.handle_tpc_mpc_exception(g,
+		ret |= g->ops.gr.handle_tpc_mpc_exception(g,
 					gpc, tpc, post_event);
 
 	return ret;
@@ -5801,7 +5801,7 @@ static int gk20a_gr_handle_gpc_exception(struct gk20a *g, bool *post_event,
 			gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg,
 				  "GPC%d: TPC%d exception pending", gpc, tpc);
 
-			ret = gk20a_gr_handle_tpc_exception(g, gpc, tpc,
+			ret |= gk20a_gr_handle_tpc_exception(g, gpc, tpc,
 					post_event, fault_ch, hww_global_esr);
 
 		}
@@ -5812,7 +5812,7 @@ static int gk20a_gr_handle_gpc_exception(struct gk20a *g, bool *post_event,
 			int gcc_ret = 0;
 			gcc_ret = g->ops.gr.handle_gcc_exception(g, gpc, tpc,
 				post_event, fault_ch, hww_global_esr);
-			ret = ret ? ret : gcc_ret;
+			ret |= ret ? ret : gcc_ret;
 		}
 
 		/* Handle GPCCS exceptions */
@@ -5820,7 +5820,7 @@ static int gk20a_gr_handle_gpc_exception(struct gk20a *g, bool *post_event,
 			int ret_ecc = 0;
 			ret_ecc = g->ops.gr.handle_gpc_gpccs_exception(g, gpc,
 								gpc_exception);
-			ret = ret ? ret : ret_ecc;
+			ret |= ret ? ret : ret_ecc;
 		}
 
 		/* Handle GPCMMU exceptions */
@@ -5829,7 +5829,7 @@ static int gk20a_gr_handle_gpc_exception(struct gk20a *g, bool *post_event,
 
 			ret_mmu = g->ops.gr.handle_gpc_gpcmmu_exception(g, gpc,
 								gpc_exception);
-			ret = ret ? ret : ret_mmu;
+			ret |= ret ? ret : ret_mmu;
 		}
 
 	}
diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c
index f0187dab..b38260a5 100644
--- a/drivers/gpu/nvgpu/gv100/hal_gv100.c
+++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c
@@ -517,7 +517,7 @@ static const struct gpu_ops gv100_ops = {
 		.check_ch_ctxsw_timeout = gk20a_fifo_check_ch_ctxsw_timeout,
 		.channel_suspend = gk20a_channel_suspend,
 		.channel_resume = gk20a_channel_resume,
-		.set_error_notifier = nvgpu_set_error_notifier,
+		.set_error_notifier = nvgpu_set_error_notifier_if_empty,
 		.setup_sw = gk20a_init_fifo_setup_sw,
 #ifdef CONFIG_TEGRA_GK20A_NVHOST
 		.alloc_syncpt_buf = gv11b_fifo_alloc_syncpt_buf,
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
index 7f6d1906..c43c6e83 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
@@ -30,6 +30,7 @@
 #include <nvgpu/enabled.h>
 #include <nvgpu/fuse.h>
 #include <nvgpu/bug.h>
+#include <nvgpu/error_notifier.h>
 
 #include "gk20a/gk20a.h"
 #include "gk20a/gr_gk20a.h"
@@ -2090,6 +2091,41 @@ static int gr_gv11b_handle_warp_esr_error_mmu_nack(struct gk20a *g,
 	return 0;
 }
 
+static int gr_gv11b_handle_warp_esr_error_misaligned_addr(struct gk20a *g,
+	u32 gpc, u32 tpc, u32 sm,
+	u32 warp_esr,
+	struct channel_gk20a *fault_ch)
+{
+	struct tsg_gk20a *tsg;
+	u32 offset;
+	struct channel_gk20a *ch_tsg;
+
+	if (fault_ch) {
+		tsg = &g->fifo.tsg[fault_ch->tsgid];
+
+		nvgpu_rwsem_down_read(&tsg->ch_list_lock);
+		nvgpu_list_for_each_entry(ch_tsg, &tsg->ch_list,
+				channel_gk20a, ch_entry) {
+			if (gk20a_channel_get(ch_tsg)) {
+				g->ops.fifo.set_error_notifier(ch_tsg,
+						 NVGPU_ERR_NOTIFIER_GR_EXCEPTION);
+				gk20a_channel_put(ch_tsg);
+			}
+		}
+		nvgpu_rwsem_up_read(&tsg->ch_list_lock);
+	}
+
+	/* clear interrupt */
+	offset = gk20a_gr_gpc_offset(g, gpc) +
+			gk20a_gr_tpc_offset(g, tpc) +
+			gv11b_gr_sm_offset(g, sm);
+	nvgpu_writel(g,
+		gr_gpc0_tpc0_sm0_hww_warp_esr_r() + offset, 0);
+
+	/* return error so that recovery is triggered by gk20a_gr_isr() */
+	return -EFAULT;
+}
+
 /* @brief pre-process work on the SM exceptions to determine if we clear them or not.
  *
  * On Pascal, if we are in CILP preemtion mode, preempt the channel and handle errors with special processing
@@ -2118,6 +2154,10 @@ int gr_gv11b_pre_process_sm_exception(struct gk20a *g,
 		return gr_gv11b_handle_warp_esr_error_mmu_nack(g, gpc, tpc, sm,
 				warp_esr, fault_ch);
 
+	if (warp_esr & gr_gpc0_tpc0_sm0_hww_warp_esr_error_misaligned_addr_f())
+		return gr_gv11b_handle_warp_esr_error_misaligned_addr(g, gpc, tpc, sm,
+				warp_esr, fault_ch);
+
 	if (fault_ch) {
 		tsg = tsg_gk20a_from_ch(fault_ch);
 		if (!tsg)
diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
index 2d6dc9b0..dd4bd55a 100644
--- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
@@ -534,7 +534,7 @@ static const struct gpu_ops gv11b_ops = {
 		.check_ch_ctxsw_timeout = gk20a_fifo_check_ch_ctxsw_timeout,
 		.channel_suspend = gk20a_channel_suspend,
 		.channel_resume = gk20a_channel_resume,
-		.set_error_notifier = nvgpu_set_error_notifier,
+		.set_error_notifier = nvgpu_set_error_notifier_if_empty,
 		.setup_sw = gk20a_init_fifo_setup_sw,
 #ifdef CONFIG_TEGRA_GK20A_NVHOST
 		.alloc_syncpt_buf = gv11b_fifo_alloc_syncpt_buf,
diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_gr_gv100.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_gr_gv100.h
index 8e475895..f5f09cdf 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_gr_gv100.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_gr_gv100.h
@@ -3632,6 +3632,10 @@ static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_none_v(void)
 {
 	return 0x00000000U;
 }
+static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_misaligned_addr_f(void)
+{
+	return 0xfU;
+}
 static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_mmu_nack_f(void)
 {
 	return 0x20U;
diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h
index 4458265d..f7968089 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h
@@ -4392,6 +4392,10 @@ static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_none_f(void)
 {
 	return 0x0U;
 }
+static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_misaligned_addr_f(void)
+{
+	return 0xfU;
+}
 static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_mmu_nack_f(void)
 {
 	return 0x20U;
-- 
cgit v1.2.2