From de47308b2c2ef2d24951a7e1c4ece9964417c167 Mon Sep 17 00:00:00 2001
From: Deepak Nibade <dnibade@nvidia.com>
Date: Fri, 18 Dec 2015 12:35:04 +0530
Subject: gpu: nvgpu: add CILP support for gp10b

Add CILP support for gp10b by defining below function
pointers (with detailed explanation)

pre_process_sm_exception()
- for CILP enabled channels, get the mask of errors
- if we need to broadcast the stop_trigger, suspend all SMs
- otherwise suspend only current SM
- clear hww_global_esr values in h/w
- gr_gp10b_set_cilp_preempt_pending()
  - get ctx_id
  - using sideband method, program FECS to generate
    interrupt on next ctxsw
  - disable and preempt the channel/TSG
  - set cilp_preempt_pending = true
- clear single step mode
- resume current SM

handle_fecs_error()
- we get ctxsw_intr1 upon next ctxsw
- clear this interrupt
- get handle of channel on which we first
  triggered SM exception
- gr_gp10b_clear_cilp_preempt_pending()
  - set cilp_preempt_pending = false
- send events to channel and debug session fd

Bug 200156699

Change-Id: Ia765db47e68fb968fada6409609af505c079df53
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/925897
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 314 +++++++++++++++++++++++++++++++++++++
 1 file changed, 314 insertions(+)

(limited to 'drivers/gpu/nvgpu/gp10b/gr_gp10b.c')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index a13b9a2c..91adf20c 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -19,6 +19,7 @@
 
 #include "gk20a/gr_gk20a.h"
 #include "gk20a/semaphore_gk20a.h"
+#include "gk20a/dbg_gpu_gk20a.h"
 
 #include "gm20b/gr_gm20b.h" /* for MAXWELL classes */
 #include "gp10b/gr_gp10b.h"
@@ -657,6 +658,8 @@ static int gr_gp10b_alloc_gr_ctx(struct gk20a *g,
 	if (err)
 		return err;
 
+	(*gr_ctx)->t18x.ctx_id_valid = false;
+
 	if (class == PASCAL_A && g->gr.t18x.ctx_vars.force_preemption_gfxp)
 		flags |= NVGPU_ALLOC_OBJ_FLAGS_GFXP;
 
@@ -1224,6 +1227,314 @@ static void gr_gp10b_get_access_map(struct gk20a *g,
 	*num_entries = ARRAY_SIZE(wl_addr_gp10b);
 }
 
+static int gr_gp10b_disable_channel_or_tsg(struct gk20a *g, struct channel_gk20a *fault_ch)
+{
+	int ret = 0;
+
+	gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "");
+
+	ret = gk20a_disable_channel_tsg(g, fault_ch);
+	if (ret) {
+		gk20a_err(dev_from_gk20a(g),
+				"CILP: failed to disable channel/TSG!\n");
+		return ret;
+	}
+
+	ret = g->ops.fifo.update_runlist(g, 0, ~0, true, false);
+	if (ret) {
+		gk20a_err(dev_from_gk20a(g),
+				"CILP: failed to restart runlist 0!");
+		return ret;
+	}
+
+	gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "CILP: restarted runlist");
+
+	if (gk20a_is_channel_marked_as_tsg(fault_ch))
+		gk20a_fifo_issue_preempt(g, fault_ch->tsgid, true);
+	else
+		gk20a_fifo_issue_preempt(g, fault_ch->hw_chid, false);
+
+	gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "CILP: preempted the channel/tsg");
+
+	return ret;
+}
+
+static int gr_gp10b_set_cilp_preempt_pending(struct gk20a *g, struct channel_gk20a *fault_ch)
+{
+	int ret;
+	struct gr_ctx_desc *gr_ctx = fault_ch->ch_ctx.gr_ctx;
+
+	gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "");
+
+	if (!gr_ctx)
+		return -EINVAL;
+
+	if (gr_ctx->t18x.cilp_preempt_pending) {
+		gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
+				"CILP is already pending for chid %d",
+				fault_ch->hw_chid);
+		return 0;
+	}
+
+	/* get ctx_id from the ucode image */
+	if (!gr_ctx->t18x.ctx_id_valid) {
+		gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
+				"CILP: looking up ctx id");
+		ret = gr_gk20a_get_ctx_id(g, fault_ch, &gr_ctx->t18x.ctx_id);
+		if (ret) {
+			gk20a_err(dev_from_gk20a(g), "CILP: error looking up ctx id!\n");
+			return ret;
+		}
+		gr_ctx->t18x.ctx_id_valid = true;
+	}
+
+	gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
+			"CILP: ctx id is 0x%x", gr_ctx->t18x.ctx_id);
+
+	/* send ucode method to set ctxsw interrupt */
+	ret = gr_gk20a_submit_fecs_sideband_method_op(g,
+			(struct fecs_method_op_gk20a) {
+			.method.data = gr_ctx->t18x.ctx_id,
+			.method.addr =
+			gr_fecs_method_push_adr_configure_interrupt_completion_option_v(),
+			.mailbox = {
+			.id = 1 /* sideband */, .data = 0,
+			.clr = ~0, .ret = NULL,
+			.ok = gr_fecs_ctxsw_mailbox_value_pass_v(),
+			.fail = 0},
+			.cond.ok = GR_IS_UCODE_OP_EQUAL,
+			.cond.fail = GR_IS_UCODE_OP_SKIP});
+
+	if (ret) {
+		gk20a_err(dev_from_gk20a(g),
+				"CILP: failed to enable ctxsw interrupt!");
+		return ret;
+	}
+
+	gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
+				"CILP: enabled ctxsw completion interrupt");
+
+	gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
+			"CILP: disabling channel %d",
+			fault_ch->hw_chid);
+
+	ret = gr_gp10b_disable_channel_or_tsg(g, fault_ch);
+	if (ret) {
+		gk20a_err(dev_from_gk20a(g),
+				"CILP: failed to disable channel!!");
+		return ret;
+	}
+
+	/* set cilp_preempt_pending = true and record the channel */
+	gr_ctx->t18x.cilp_preempt_pending = true;
+	g->gr.t18x.cilp_preempt_pending_chid = fault_ch->hw_chid;
+
+	return 0;
+}
+
+static int gr_gp10b_clear_cilp_preempt_pending(struct gk20a *g,
+					       struct channel_gk20a *fault_ch)
+{
+	struct gr_ctx_desc *gr_ctx = fault_ch->ch_ctx.gr_ctx;
+
+	gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "");
+
+	if (!gr_ctx)
+		return -EINVAL;
+
+	/* The ucode is self-clearing, so all we need to do here is
+	   to clear cilp_preempt_pending. */
+	if (!gr_ctx->t18x.cilp_preempt_pending) {
+		gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
+				"CILP is already cleared for chid %d\n",
+				fault_ch->hw_chid);
+		return 0;
+	}
+
+	gr_ctx->t18x.cilp_preempt_pending = false;
+	g->gr.t18x.cilp_preempt_pending_chid = -1;
+
+	return 0;
+}
+
+/* @brief pre-process work on the SM exceptions to determine if we clear them or not.
+ *
+ * On Pascal, if we are in CILP preemtion mode, preempt the channel and handle errors with special processing
+ */
+int gr_gp10b_pre_process_sm_exception(struct gk20a *g,
+		u32 gpc, u32 tpc, u32 global_esr, u32 warp_esr,
+		bool sm_debugger_attached, struct channel_gk20a *fault_ch,
+		bool *early_exit, bool *ignore_debugger)
+{
+	int ret;
+	bool cilp_enabled = (fault_ch->ch_ctx.gr_ctx->preempt_mode ==
+			NVGPU_GR_PREEMPTION_MODE_CILP) ;
+	u32 global_mask = 0, dbgr_control0, global_esr_copy;
+	u32 offset = proj_gpc_stride_v() * gpc +
+		     proj_tpc_in_gpc_stride_v() * tpc;
+
+	*early_exit = false;
+	*ignore_debugger = false;
+
+	gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "SM Exception received on gpc %d tpc %d = %u\n",
+			gpc, tpc, global_esr);
+
+	if (cilp_enabled && sm_debugger_attached) {
+		if (global_esr & gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f())
+			gk20a_writel(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset,
+					gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f());
+
+		if (global_esr & gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f())
+			gk20a_writel(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset,
+					gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f());
+
+		global_mask = gr_gpc0_tpc0_sm_hww_global_esr_sm_to_sm_fault_pending_f() |
+			gr_gpcs_tpcs_sm_hww_global_esr_l1_error_pending_f() |
+			gr_gpcs_tpcs_sm_hww_global_esr_multiple_warp_errors_pending_f() |
+			gr_gpcs_tpcs_sm_hww_global_esr_physical_stack_overflow_error_pending_f() |
+			gr_gpcs_tpcs_sm_hww_global_esr_timeout_error_pending_f() |
+			gr_gpcs_tpcs_sm_hww_global_esr_bpt_pause_pending_f();
+
+		if (warp_esr != 0 || (global_esr & global_mask) != 0) {
+			*ignore_debugger = true;
+
+			gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
+					"CILP: starting wait for LOCKED_DOWN on gpc %d tpc %d\n",
+					gpc, tpc);
+
+			if (gk20a_dbg_gpu_broadcast_stop_trigger(fault_ch)) {
+				gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
+						"CILP: Broadcasting STOP_TRIGGER from gpc %d tpc %d\n",
+						gpc, tpc);
+				gk20a_suspend_all_sms(g, global_mask, false);
+
+				gk20a_dbg_gpu_clear_broadcast_stop_trigger(fault_ch);
+			} else {
+				gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
+						"CILP: STOP_TRIGGER from gpc %d tpc %d\n",
+						gpc, tpc);
+				gk20a_suspend_single_sm(g, gpc, tpc, global_mask, true);
+			}
+
+			/* reset the HWW errors after locking down */
+			global_esr_copy = gk20a_readl(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset);
+			gk20a_gr_clear_sm_hww(g, gpc, tpc, global_esr_copy);
+			gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
+					"CILP: HWWs cleared for gpc %d tpc %d\n",
+					gpc, tpc);
+
+			gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "CILP: Setting CILP preempt pending\n");
+			ret = gr_gp10b_set_cilp_preempt_pending(g, fault_ch);
+			if (ret) {
+				gk20a_err(dev_from_gk20a(g), "CILP: error while setting CILP preempt pending!\n");
+				return ret;
+			}
+
+			dbgr_control0 = gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_control0_r() + offset);
+			if (dbgr_control0 & gr_gpcs_tpcs_sm_dbgr_control0_single_step_mode_enable_f()) {
+				gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
+						"CILP: clearing SINGLE_STEP_MODE before resume for gpc %d tpc %d\n",
+						gpc, tpc);
+				dbgr_control0 = set_field(dbgr_control0,
+						gr_gpcs_tpcs_sm_dbgr_control0_single_step_mode_m(),
+						gr_gpcs_tpcs_sm_dbgr_control0_single_step_mode_disable_f());
+				gk20a_writel(g, gr_gpc0_tpc0_sm_dbgr_control0_r() + offset, dbgr_control0);
+			}
+
+			gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
+					"CILP: resume for gpc %d tpc %d\n",
+					gpc, tpc);
+			gk20a_resume_single_sm(g, gpc, tpc);
+
+			*ignore_debugger = true;
+			gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "CILP: All done on gpc %d, tpc %d\n", gpc, tpc);
+		}
+
+		*early_exit = true;
+	}
+	return 0;
+}
+
+static int gr_gp10b_get_cilp_preempt_pending_chid(struct gk20a *g, int *__chid)
+{
+	struct gr_ctx_desc *gr_ctx;
+	struct channel_gk20a *ch;
+	int chid;
+	int ret = -EINVAL;
+
+	chid = g->gr.t18x.cilp_preempt_pending_chid;
+
+	ch = gk20a_channel_get(gk20a_fifo_channel_from_hw_chid(g, chid));
+	if (!ch)
+		return ret;
+
+	gr_ctx = ch->ch_ctx.gr_ctx;
+
+	if (gr_ctx->t18x.cilp_preempt_pending) {
+		*__chid = chid;
+		ret = 0;
+	}
+
+	gk20a_channel_put(ch);
+
+	return ret;
+}
+
+static int gr_gp10b_handle_fecs_error(struct gk20a *g,
+				struct channel_gk20a *__ch,
+				struct gr_gk20a_isr_data *isr_data)
+{
+	u32 gr_fecs_intr = gk20a_readl(g, gr_fecs_host_int_status_r());
+	struct channel_gk20a *ch;
+	int chid = -1;
+	int ret = 0;
+
+	gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "");
+
+	/*
+	 * INTR1 (bit 1 of the HOST_INT_STATUS_CTXSW_INTR)
+	 * indicates that a CILP ctxsw save has finished
+	 */
+	if (gr_fecs_intr & gr_fecs_host_int_status_ctxsw_intr_f(2)) {
+		gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
+				"CILP: ctxsw save completed!\n");
+
+		/* now clear the interrupt */
+		gk20a_writel(g, gr_fecs_host_int_clear_r(),
+				gr_fecs_host_int_clear_ctxsw_intr1_clear_f());
+
+		ret = gr_gp10b_get_cilp_preempt_pending_chid(g, &chid);
+		if (ret)
+			goto clean_up;
+
+		ch = gk20a_channel_get(
+				gk20a_fifo_channel_from_hw_chid(g, chid));
+		if (!ch)
+			goto clean_up;
+
+
+		/* set preempt_pending to false */
+		ret = gr_gp10b_clear_cilp_preempt_pending(g, ch);
+		if (ret) {
+			gk20a_err(dev_from_gk20a(g), "CILP: error while unsetting CILP preempt pending!\n");
+			gk20a_channel_put(ch);
+			goto clean_up;
+		}
+
+		if (gk20a_gr_sm_debugger_attached(g)) {
+			gk20a_err(dev_from_gk20a(g), "CILP: posting usermode event");
+			gk20a_dbg_gpu_post_events(ch);
+			gk20a_channel_post_event(ch);
+		}
+
+		gk20a_channel_put(ch);
+	}
+
+clean_up:
+	/* handle any remaining interrupts */
+	return gk20a_gr_handle_fecs_error(g, __ch, isr_data);
+}
+
 static u32 gp10b_mask_hww_warp_esr(u32 hww_warp_esr)
 {
 	if (!(hww_warp_esr & gr_gpc0_tpc0_sm_hww_warp_esr_addr_valid_m()))
@@ -1267,4 +1578,7 @@ void gp10b_init_gr(struct gpu_ops *gops)
 	gops->gr.handle_sm_exception = gr_gp10b_handle_sm_exception;
 	gops->gr.handle_tex_exception = gr_gp10b_handle_tex_exception;
 	gops->gr.mask_hww_warp_esr = gp10b_mask_hww_warp_esr;
+	gops->gr.pre_process_sm_exception =
+		gr_gp10b_pre_process_sm_exception;
+	gops->gr.handle_fecs_error = gr_gp10b_handle_fecs_error;
 }
-- 
cgit v1.2.2