gpu: nvgpu: add CILP support for gp10b

Add CILP support for gp10b by defining below function pointers (with detailed explanation) pre_process_sm_exception() - for CILP enabled channels, get the mask of errors - if we need to broadcast the stop_trigger, suspend all SMs - otherwise suspend only current SM - clear hww_global_esr values in h/w - gr_gp10b_set_cilp_preempt_pending() - get ctx_id - using sideband method, program FECS to generate interrupt on next ctxsw - disable and preempt the channel/TSG - set cilp_preempt_pending = true - clear single step mode - resume current SM handle_fecs_error() - we get ctxsw_intr1 upon next ctxsw - clear this interrupt - get handle of channel on which we first triggered SM exception - gr_gp10b_clear_cilp_preempt_pending() - set cilp_preempt_pending = false - send events to channel and debug session fd Bug 200156699 Change-Id: Ia765db47e68fb968fada6409609af505c079df53 Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: http://git-master/r/925897 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
author: Deepak Nibade <dnibade@nvidia.com> 2015-12-18 02:05:04 -0500
committer: Deepak Nibade <dnibade@nvidia.com> 2016-12-27 04:52:10 -0500
commit: de47308b2c2ef2d24951a7e1c4ece9964417c167 (patch)
tree: e35cf4a956fb2580cd63f50cdf9d422b2d0763df /drivers/gpu/nvgpu/gp10b/gr_gp10b.c
parent: 095bd5e59d896ebab12af25ac05aa4071257ecb1 (diff)
1 files changed, 314 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index a13b9a2c..91adf20c 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -19,6 +19,7 @@
 #include "gk20a/gr_gk20a.h"
 #include "gk20a/semaphore_gk20a.h"
+#include "gk20a/dbg_gpu_gk20a.h"
 #include "gm20b/gr_gm20b.h" /* for MAXWELL classes */
 #include "gp10b/gr_gp10b.h"
@@ -657,6 +658,8 @@ static int gr_gp10b_alloc_gr_ctx(struct gk20a *g,
        if (err)
                return err;
+        (*gr_ctx)->t18x.ctx_id_valid = false;
        if (class == PASCAL_A && g->gr.t18x.ctx_vars.force_preemption_gfxp)
                flags |= NVGPU_ALLOC_OBJ_FLAGS_GFXP;
@@ -1224,6 +1227,314 @@ static void gr_gp10b_get_access_map(struct gk20a *g,
        *num_entries = ARRAY_SIZE(wl_addr_gp10b);
 }
+static int gr_gp10b_disable_channel_or_tsg(struct gk20a *g, struct channel_gk20a *fault_ch)
+{
+        int ret = 0;
+        gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "");
+        ret = gk20a_disable_channel_tsg(g, fault_ch);
+        if (ret) {
+                gk20a_err(dev_from_gk20a(g),
+                                "CILP: failed to disable channel/TSG!\n");
+                return ret;
+        }
+        ret = g->ops.fifo.update_runlist(g, 0, ~0, true, false);
+        if (ret) {
+                gk20a_err(dev_from_gk20a(g),
+                                "CILP: failed to restart runlist 0!");
+                return ret;
+        }
+        gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "CILP: restarted runlist");
+        if (gk20a_is_channel_marked_as_tsg(fault_ch))
+                gk20a_fifo_issue_preempt(g, fault_ch->tsgid, true);
+        else
+                gk20a_fifo_issue_preempt(g, fault_ch->hw_chid, false);
+        gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "CILP: preempted the channel/tsg");
+        return ret;
+}
+static int gr_gp10b_set_cilp_preempt_pending(struct gk20a *g, struct channel_gk20a *fault_ch)
+{
+        int ret;
+        struct gr_ctx_desc *gr_ctx = fault_ch->ch_ctx.gr_ctx;
+        gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "");
+        if (!gr_ctx)
+                return -EINVAL;
+        if (gr_ctx->t18x.cilp_preempt_pending) {
+                gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
+                                "CILP is already pending for chid %d",
+                                fault_ch->hw_chid);
+                return 0;
+        }
+        /* get ctx_id from the ucode image */
+        if (!gr_ctx->t18x.ctx_id_valid) {
+                gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
+                                "CILP: looking up ctx id");
+                ret = gr_gk20a_get_ctx_id(g, fault_ch, &gr_ctx->t18x.ctx_id);
+                if (ret) {
+                        gk20a_err(dev_from_gk20a(g), "CILP: error looking up ctx id!\n");
+                        return ret;
+                }
+                gr_ctx->t18x.ctx_id_valid = true;
+        }
+        gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
+                        "CILP: ctx id is 0x%x", gr_ctx->t18x.ctx_id);
+        /* send ucode method to set ctxsw interrupt */
+        ret = gr_gk20a_submit_fecs_sideband_method_op(g,
+                        (struct fecs_method_op_gk20a) {
+                        .method.data = gr_ctx->t18x.ctx_id,
+                        .method.addr =
+                        gr_fecs_method_push_adr_configure_interrupt_completion_option_v(),
+                        .mailbox = {
+                        .id = 1 /* sideband */, .data = 0,
+                        .clr = ~0, .ret = NULL,
+                        .ok = gr_fecs_ctxsw_mailbox_value_pass_v(),
+                        .fail = 0},
+                        .cond.ok = GR_IS_UCODE_OP_EQUAL,
+                        .cond.fail = GR_IS_UCODE_OP_SKIP});
+        if (ret) {
+                gk20a_err(dev_from_gk20a(g),
+                                "CILP: failed to enable ctxsw interrupt!");
+                return ret;
+        }
+        gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
+                                "CILP: enabled ctxsw completion interrupt");
+        gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
+                        "CILP: disabling channel %d",
+                        fault_ch->hw_chid);
+        ret = gr_gp10b_disable_channel_or_tsg(g, fault_ch);
+        if (ret) {
+                gk20a_err(dev_from_gk20a(g),
+                                "CILP: failed to disable channel!!");
+                return ret;
+        }
+        /* set cilp_preempt_pending = true and record the channel */
+        gr_ctx->t18x.cilp_preempt_pending = true;
+        g->gr.t18x.cilp_preempt_pending_chid = fault_ch->hw_chid;
+        return 0;
+}
+static int gr_gp10b_clear_cilp_preempt_pending(struct gk20a *g,
+                                               struct channel_gk20a *fault_ch)
+{
+        struct gr_ctx_desc *gr_ctx = fault_ch->ch_ctx.gr_ctx;
+        gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "");
+        if (!gr_ctx)
+                return -EINVAL;
+        /* The ucode is self-clearing, so all we need to do here is
+           to clear cilp_preempt_pending. */
+        if (!gr_ctx->t18x.cilp_preempt_pending) {
+                gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
+                                "CILP is already cleared for chid %d\n",
+                                fault_ch->hw_chid);
+                return 0;
+        }
+        gr_ctx->t18x.cilp_preempt_pending = false;
+        g->gr.t18x.cilp_preempt_pending_chid = -1;
+        return 0;
+}
+/* @brief pre-process work on the SM exceptions to determine if we clear them or not.
+ *
+ * On Pascal, if we are in CILP preemtion mode, preempt the channel and handle errors with special processing
+ */
+int gr_gp10b_pre_process_sm_exception(struct gk20a *g,
+                u32 gpc, u32 tpc, u32 global_esr, u32 warp_esr,
+                bool sm_debugger_attached, struct channel_gk20a *fault_ch,
+                bool *early_exit, bool *ignore_debugger)
+{
+        int ret;
+        bool cilp_enabled = (fault_ch->ch_ctx.gr_ctx->preempt_mode ==
+                        NVGPU_GR_PREEMPTION_MODE_CILP) ;
+        u32 global_mask = 0, dbgr_control0, global_esr_copy;
+        u32 offset = proj_gpc_stride_v() * gpc +
+                     proj_tpc_in_gpc_stride_v() * tpc;
+        *early_exit = false;
+        *ignore_debugger = false;
+        gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "SM Exception received on gpc %d tpc %d = %u\n",
+                        gpc, tpc, global_esr);
+        if (cilp_enabled && sm_debugger_attached) {
+                if (global_esr & gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f())
+                        gk20a_writel(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset,
+                                        gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f());
+                if (global_esr & gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f())
+                        gk20a_writel(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset,
+                                        gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f());
+                global_mask = gr_gpc0_tpc0_sm_hww_global_esr_sm_to_sm_fault_pending_f() |
+                        gr_gpcs_tpcs_sm_hww_global_esr_l1_error_pending_f() |
+                        gr_gpcs_tpcs_sm_hww_global_esr_multiple_warp_errors_pending_f() |
+                        gr_gpcs_tpcs_sm_hww_global_esr_physical_stack_overflow_error_pending_f() |
+                        gr_gpcs_tpcs_sm_hww_global_esr_timeout_error_pending_f() |
+                        gr_gpcs_tpcs_sm_hww_global_esr_bpt_pause_pending_f();
+                if (warp_esr != 0 || (global_esr & global_mask) != 0) {
+                        *ignore_debugger = true;
+                        gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
+                                        "CILP: starting wait for LOCKED_DOWN on gpc %d tpc %d\n",
+                                        gpc, tpc);
+                        if (gk20a_dbg_gpu_broadcast_stop_trigger(fault_ch)) {
+                                gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
+                                                "CILP: Broadcasting STOP_TRIGGER from gpc %d tpc %d\n",
+                                                gpc, tpc);
+                                gk20a_suspend_all_sms(g, global_mask, false);
+                                gk20a_dbg_gpu_clear_broadcast_stop_trigger(fault_ch);
+                        } else {
+                                gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
+                                                "CILP: STOP_TRIGGER from gpc %d tpc %d\n",
+                                                gpc, tpc);
+                                gk20a_suspend_single_sm(g, gpc, tpc, global_mask, true);
+                        }
+                        /* reset the HWW errors after locking down */
+                        global_esr_copy = gk20a_readl(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset);
+                        gk20a_gr_clear_sm_hww(g, gpc, tpc, global_esr_copy);
+                        gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
+                                        "CILP: HWWs cleared for gpc %d tpc %d\n",
+                                        gpc, tpc);
+                        gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "CILP: Setting CILP preempt pending\n");
+                        ret = gr_gp10b_set_cilp_preempt_pending(g, fault_ch);
+                        if (ret) {
+                                gk20a_err(dev_from_gk20a(g), "CILP: error while setting CILP preempt pending!\n");
+                                return ret;
+                        }
+                        dbgr_control0 = gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_control0_r() + offset);
+                        if (dbgr_control0 & gr_gpcs_tpcs_sm_dbgr_control0_single_step_mode_enable_f()) {
+                                gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
+                                                "CILP: clearing SINGLE_STEP_MODE before resume for gpc %d tpc %d\n",
+                                                gpc, tpc);
+                                dbgr_control0 = set_field(dbgr_control0,
+                                                gr_gpcs_tpcs_sm_dbgr_control0_single_step_mode_m(),
+                                                gr_gpcs_tpcs_sm_dbgr_control0_single_step_mode_disable_f());
+                                gk20a_writel(g, gr_gpc0_tpc0_sm_dbgr_control0_r() + offset, dbgr_control0);
+                        }
+                        gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
+                                        "CILP: resume for gpc %d tpc %d\n",
+                                        gpc, tpc);
+                        gk20a_resume_single_sm(g, gpc, tpc);
+                        *ignore_debugger = true;
+                        gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "CILP: All done on gpc %d, tpc %d\n", gpc, tpc);
+                }
+                *early_exit = true;
+        }
+        return 0;
+}
+static int gr_gp10b_get_cilp_preempt_pending_chid(struct gk20a *g, int *__chid)
+{
+        struct gr_ctx_desc *gr_ctx;
+        struct channel_gk20a *ch;
+        int chid;
+        int ret = -EINVAL;
+        chid = g->gr.t18x.cilp_preempt_pending_chid;
+        ch = gk20a_channel_get(gk20a_fifo_channel_from_hw_chid(g, chid));
+        if (!ch)
+                return ret;
+        gr_ctx = ch->ch_ctx.gr_ctx;
+        if (gr_ctx->t18x.cilp_preempt_pending) {
+                *__chid = chid;
+                ret = 0;
+        }
+        gk20a_channel_put(ch);
+        return ret;
+}
+static int gr_gp10b_handle_fecs_error(struct gk20a *g,
+                                struct channel_gk20a *__ch,
+                                struct gr_gk20a_isr_data *isr_data)
+{
+        u32 gr_fecs_intr = gk20a_readl(g, gr_fecs_host_int_status_r());
+        struct channel_gk20a *ch;
+        int chid = -1;
+        int ret = 0;
+        gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "");
+        /*
+         * INTR1 (bit 1 of the HOST_INT_STATUS_CTXSW_INTR)
+         * indicates that a CILP ctxsw save has finished
+         */
+        if (gr_fecs_intr & gr_fecs_host_int_status_ctxsw_intr_f(2)) {
+                gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
+                                "CILP: ctxsw save completed!\n");
+                /* now clear the interrupt */
+                gk20a_writel(g, gr_fecs_host_int_clear_r(),
+                                gr_fecs_host_int_clear_ctxsw_intr1_clear_f());
+                ret = gr_gp10b_get_cilp_preempt_pending_chid(g, &chid);
+                if (ret)
+                        goto clean_up;
+                ch = gk20a_channel_get(
+                                gk20a_fifo_channel_from_hw_chid(g, chid));
+                if (!ch)
+                        goto clean_up;
+                /* set preempt_pending to false */
+                ret = gr_gp10b_clear_cilp_preempt_pending(g, ch);
+                if (ret) {
+                        gk20a_err(dev_from_gk20a(g), "CILP: error while unsetting CILP preempt pending!\n");
+                        gk20a_channel_put(ch);
+                        goto clean_up;
+                }
+                if (gk20a_gr_sm_debugger_attached(g)) {
+                        gk20a_err(dev_from_gk20a(g), "CILP: posting usermode event");
+                        gk20a_dbg_gpu_post_events(ch);
+                        gk20a_channel_post_event(ch);
+                }
+                gk20a_channel_put(ch);
+        }
+clean_up:
+        /* handle any remaining interrupts */
+        return gk20a_gr_handle_fecs_error(g, __ch, isr_data);
+}
 static u32 gp10b_mask_hww_warp_esr(u32 hww_warp_esr)
 {
        if (!(hww_warp_esr & gr_gpc0_tpc0_sm_hww_warp_esr_addr_valid_m()))
@@ -1267,4 +1578,7 @@ void gp10b_init_gr(struct gpu_ops *gops)
        gops->gr.handle_sm_exception = gr_gp10b_handle_sm_exception;
        gops->gr.handle_tex_exception = gr_gp10b_handle_tex_exception;
        gops->gr.mask_hww_warp_esr = gp10b_mask_hww_warp_esr;
+        gops->gr.pre_process_sm_exception =
+                gr_gp10b_pre_process_sm_exception;
+        gops->gr.handle_fecs_error = gr_gp10b_handle_fecs_error;
 }
author	Deepak Nibade <dnibade@nvidia.com>	2015-12-18 02:05:04 -0500
committer	Deepak Nibade <dnibade@nvidia.com>	2016-12-27 04:52:10 -0500
commit	de47308b2c2ef2d24951a7e1c4ece9964417c167 (patch)
tree	e35cf4a956fb2580cd63f50cdf9d422b2d0763df /drivers/gpu/nvgpu/gp10b/gr_gp10b.c
parent	095bd5e59d896ebab12af25ac05aa4071257ecb1 (diff)

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c index a13b9a2c..91adf20c 100644 --- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -19,6 +19,7 @@
19		19
20	#include "gk20a/gr_gk20a.h"	20	#include "gk20a/gr_gk20a.h"
21	#include "gk20a/semaphore_gk20a.h"	21	#include "gk20a/semaphore_gk20a.h"
		22	#include "gk20a/dbg_gpu_gk20a.h"
22		23
23	#include "gm20b/gr_gm20b.h" /* for MAXWELL classes */	24	#include "gm20b/gr_gm20b.h" /* for MAXWELL classes */
24	#include "gp10b/gr_gp10b.h"	25	#include "gp10b/gr_gp10b.h"
@@ -657,6 +658,8 @@ static int gr_gp10b_alloc_gr_ctx(struct gk20a *g,
657	if (err)	658	if (err)
658	return err;	659	return err;
659		660
		661	(*gr_ctx)->t18x.ctx_id_valid = false;
		662
660	if (class == PASCAL_A && g->gr.t18x.ctx_vars.force_preemption_gfxp)	663	if (class == PASCAL_A && g->gr.t18x.ctx_vars.force_preemption_gfxp)
661	flags \|= NVGPU_ALLOC_OBJ_FLAGS_GFXP;	664	flags \|= NVGPU_ALLOC_OBJ_FLAGS_GFXP;
662		665
@@ -1224,6 +1227,314 @@ static void gr_gp10b_get_access_map(struct gk20a *g,
1224	*num_entries = ARRAY_SIZE(wl_addr_gp10b);	1227	*num_entries = ARRAY_SIZE(wl_addr_gp10b);
1225	}	1228	}
1226		1229
		1230	static int gr_gp10b_disable_channel_or_tsg(struct gk20a g, struct channel_gk20a fault_ch)
		1231	{
		1232	int ret = 0;
		1233
		1234	gk20a_dbg(gpu_dbg_fn \| gpu_dbg_gpu_dbg \| gpu_dbg_intr, "");
		1235
		1236	ret = gk20a_disable_channel_tsg(g, fault_ch);
		1237	if (ret) {
		1238	gk20a_err(dev_from_gk20a(g),
		1239	"CILP: failed to disable channel/TSG!\n");
		1240	return ret;
		1241	}
		1242
		1243	ret = g->ops.fifo.update_runlist(g, 0, ~0, true, false);
		1244	if (ret) {
		1245	gk20a_err(dev_from_gk20a(g),
		1246	"CILP: failed to restart runlist 0!");
		1247	return ret;
		1248	}
		1249
		1250	gk20a_dbg(gpu_dbg_fn \| gpu_dbg_gpu_dbg \| gpu_dbg_intr, "CILP: restarted runlist");
		1251
		1252	if (gk20a_is_channel_marked_as_tsg(fault_ch))
		1253	gk20a_fifo_issue_preempt(g, fault_ch->tsgid, true);
		1254	else
		1255	gk20a_fifo_issue_preempt(g, fault_ch->hw_chid, false);
		1256
		1257	gk20a_dbg(gpu_dbg_fn \| gpu_dbg_gpu_dbg \| gpu_dbg_intr, "CILP: preempted the channel/tsg");
		1258
		1259	return ret;
		1260	}
		1261
		1262	static int gr_gp10b_set_cilp_preempt_pending(struct gk20a g, struct channel_gk20a fault_ch)
		1263	{
		1264	int ret;
		1265	struct gr_ctx_desc *gr_ctx = fault_ch->ch_ctx.gr_ctx;
		1266
		1267	gk20a_dbg(gpu_dbg_fn \| gpu_dbg_gpu_dbg \| gpu_dbg_intr, "");
		1268
		1269	if (!gr_ctx)
		1270	return -EINVAL;
		1271
		1272	if (gr_ctx->t18x.cilp_preempt_pending) {
		1273	gk20a_dbg(gpu_dbg_fn \| gpu_dbg_gpu_dbg \| gpu_dbg_intr,
		1274	"CILP is already pending for chid %d",
		1275	fault_ch->hw_chid);
		1276	return 0;
		1277	}
		1278
		1279	/* get ctx_id from the ucode image */
		1280	if (!gr_ctx->t18x.ctx_id_valid) {
		1281	gk20a_dbg(gpu_dbg_fn \| gpu_dbg_gpu_dbg \| gpu_dbg_intr,
		1282	"CILP: looking up ctx id");
		1283	ret = gr_gk20a_get_ctx_id(g, fault_ch, &gr_ctx->t18x.ctx_id);
		1284	if (ret) {
		1285	gk20a_err(dev_from_gk20a(g), "CILP: error looking up ctx id!\n");
		1286	return ret;
		1287	}
		1288	gr_ctx->t18x.ctx_id_valid = true;
		1289	}
		1290
		1291	gk20a_dbg(gpu_dbg_fn \| gpu_dbg_gpu_dbg \| gpu_dbg_intr,
		1292	"CILP: ctx id is 0x%x", gr_ctx->t18x.ctx_id);
		1293
		1294	/* send ucode method to set ctxsw interrupt */
		1295	ret = gr_gk20a_submit_fecs_sideband_method_op(g,
		1296	(struct fecs_method_op_gk20a) {
		1297	.method.data = gr_ctx->t18x.ctx_id,
		1298	.method.addr =
		1299	gr_fecs_method_push_adr_configure_interrupt_completion_option_v(),
		1300	.mailbox = {
		1301	.id = 1 /* sideband */, .data = 0,
		1302	.clr = ~0, .ret = NULL,
		1303	.ok = gr_fecs_ctxsw_mailbox_value_pass_v(),
		1304	.fail = 0},
		1305	.cond.ok = GR_IS_UCODE_OP_EQUAL,
		1306	.cond.fail = GR_IS_UCODE_OP_SKIP});
		1307
		1308	if (ret) {
		1309	gk20a_err(dev_from_gk20a(g),
		1310	"CILP: failed to enable ctxsw interrupt!");
		1311	return ret;
		1312	}
		1313
		1314	gk20a_dbg(gpu_dbg_fn \| gpu_dbg_gpu_dbg \| gpu_dbg_intr,
		1315	"CILP: enabled ctxsw completion interrupt");
		1316
		1317	gk20a_dbg(gpu_dbg_fn \| gpu_dbg_gpu_dbg \| gpu_dbg_intr,
		1318	"CILP: disabling channel %d",
		1319	fault_ch->hw_chid);
		1320
		1321	ret = gr_gp10b_disable_channel_or_tsg(g, fault_ch);
		1322	if (ret) {
		1323	gk20a_err(dev_from_gk20a(g),
		1324	"CILP: failed to disable channel!!");
		1325	return ret;
		1326	}
		1327
		1328	/* set cilp_preempt_pending = true and record the channel */
		1329	gr_ctx->t18x.cilp_preempt_pending = true;
		1330	g->gr.t18x.cilp_preempt_pending_chid = fault_ch->hw_chid;
		1331
		1332	return 0;
		1333	}
		1334
		1335	static int gr_gp10b_clear_cilp_preempt_pending(struct gk20a *g,
		1336	struct channel_gk20a *fault_ch)
		1337	{
		1338	struct gr_ctx_desc *gr_ctx = fault_ch->ch_ctx.gr_ctx;
		1339
		1340	gk20a_dbg(gpu_dbg_fn \| gpu_dbg_gpu_dbg \| gpu_dbg_intr, "");
		1341
		1342	if (!gr_ctx)
		1343	return -EINVAL;
		1344
		1345	/* The ucode is self-clearing, so all we need to do here is
		1346	to clear cilp_preempt_pending. */
		1347	if (!gr_ctx->t18x.cilp_preempt_pending) {
		1348	gk20a_dbg(gpu_dbg_fn \| gpu_dbg_gpu_dbg \| gpu_dbg_intr,
		1349	"CILP is already cleared for chid %d\n",
		1350	fault_ch->hw_chid);
		1351	return 0;
		1352	}
		1353
		1354	gr_ctx->t18x.cilp_preempt_pending = false;
		1355	g->gr.t18x.cilp_preempt_pending_chid = -1;
		1356
		1357	return 0;
		1358	}
		1359
		1360	/* @brief pre-process work on the SM exceptions to determine if we clear them or not.
		1361	*
		1362	* On Pascal, if we are in CILP preemtion mode, preempt the channel and handle errors with special processing
		1363	*/
		1364	int gr_gp10b_pre_process_sm_exception(struct gk20a *g,
		1365	u32 gpc, u32 tpc, u32 global_esr, u32 warp_esr,
		1366	bool sm_debugger_attached, struct channel_gk20a *fault_ch,
		1367	bool early_exit, bool ignore_debugger)
		1368	{
		1369	int ret;
		1370	bool cilp_enabled = (fault_ch->ch_ctx.gr_ctx->preempt_mode ==
		1371	NVGPU_GR_PREEMPTION_MODE_CILP) ;
		1372	u32 global_mask = 0, dbgr_control0, global_esr_copy;
		1373	u32 offset = proj_gpc_stride_v() * gpc +
		1374	proj_tpc_in_gpc_stride_v() * tpc;
		1375
		1376	*early_exit = false;
		1377	*ignore_debugger = false;
		1378
		1379	gk20a_dbg(gpu_dbg_fn \| gpu_dbg_gpu_dbg, "SM Exception received on gpc %d tpc %d = %u\n",
		1380	gpc, tpc, global_esr);
		1381
		1382	if (cilp_enabled && sm_debugger_attached) {
		1383	if (global_esr & gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f())
		1384	gk20a_writel(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset,
		1385	gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f());
		1386
		1387	if (global_esr & gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f())
		1388	gk20a_writel(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset,
		1389	gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f());
		1390
		1391	global_mask = gr_gpc0_tpc0_sm_hww_global_esr_sm_to_sm_fault_pending_f() \|
		1392	gr_gpcs_tpcs_sm_hww_global_esr_l1_error_pending_f() \|
		1393	gr_gpcs_tpcs_sm_hww_global_esr_multiple_warp_errors_pending_f() \|
		1394	gr_gpcs_tpcs_sm_hww_global_esr_physical_stack_overflow_error_pending_f() \|
		1395	gr_gpcs_tpcs_sm_hww_global_esr_timeout_error_pending_f() \|
		1396	gr_gpcs_tpcs_sm_hww_global_esr_bpt_pause_pending_f();
		1397
		1398	if (warp_esr != 0 \|\| (global_esr & global_mask) != 0) {
		1399	*ignore_debugger = true;
		1400
		1401	gk20a_dbg(gpu_dbg_fn \| gpu_dbg_gpu_dbg,
		1402	"CILP: starting wait for LOCKED_DOWN on gpc %d tpc %d\n",
		1403	gpc, tpc);
		1404
		1405	if (gk20a_dbg_gpu_broadcast_stop_trigger(fault_ch)) {
		1406	gk20a_dbg(gpu_dbg_fn \| gpu_dbg_gpu_dbg,
		1407	"CILP: Broadcasting STOP_TRIGGER from gpc %d tpc %d\n",
		1408	gpc, tpc);
		1409	gk20a_suspend_all_sms(g, global_mask, false);
		1410
		1411	gk20a_dbg_gpu_clear_broadcast_stop_trigger(fault_ch);
		1412	} else {
		1413	gk20a_dbg(gpu_dbg_fn \| gpu_dbg_gpu_dbg,
		1414	"CILP: STOP_TRIGGER from gpc %d tpc %d\n",
		1415	gpc, tpc);
		1416	gk20a_suspend_single_sm(g, gpc, tpc, global_mask, true);
		1417	}
		1418
		1419	/* reset the HWW errors after locking down */
		1420	global_esr_copy = gk20a_readl(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset);
		1421	gk20a_gr_clear_sm_hww(g, gpc, tpc, global_esr_copy);
		1422	gk20a_dbg(gpu_dbg_fn \| gpu_dbg_gpu_dbg,
		1423	"CILP: HWWs cleared for gpc %d tpc %d\n",
		1424	gpc, tpc);
		1425
		1426	gk20a_dbg(gpu_dbg_fn \| gpu_dbg_gpu_dbg, "CILP: Setting CILP preempt pending\n");
		1427	ret = gr_gp10b_set_cilp_preempt_pending(g, fault_ch);
		1428	if (ret) {
		1429	gk20a_err(dev_from_gk20a(g), "CILP: error while setting CILP preempt pending!\n");
		1430	return ret;
		1431	}
		1432
		1433	dbgr_control0 = gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_control0_r() + offset);
		1434	if (dbgr_control0 & gr_gpcs_tpcs_sm_dbgr_control0_single_step_mode_enable_f()) {
		1435	gk20a_dbg(gpu_dbg_fn \| gpu_dbg_gpu_dbg,
		1436	"CILP: clearing SINGLE_STEP_MODE before resume for gpc %d tpc %d\n",
		1437	gpc, tpc);
		1438	dbgr_control0 = set_field(dbgr_control0,
		1439	gr_gpcs_tpcs_sm_dbgr_control0_single_step_mode_m(),
		1440	gr_gpcs_tpcs_sm_dbgr_control0_single_step_mode_disable_f());
		1441	gk20a_writel(g, gr_gpc0_tpc0_sm_dbgr_control0_r() + offset, dbgr_control0);
		1442	}
		1443
		1444	gk20a_dbg(gpu_dbg_fn \| gpu_dbg_gpu_dbg,
		1445	"CILP: resume for gpc %d tpc %d\n",
		1446	gpc, tpc);
		1447	gk20a_resume_single_sm(g, gpc, tpc);
		1448
		1449	*ignore_debugger = true;
		1450	gk20a_dbg(gpu_dbg_fn \| gpu_dbg_gpu_dbg, "CILP: All done on gpc %d, tpc %d\n", gpc, tpc);
		1451	}
		1452
		1453	*early_exit = true;
		1454	}
		1455	return 0;
		1456	}
		1457
		1458	static int gr_gp10b_get_cilp_preempt_pending_chid(struct gk20a g, int __chid)
		1459	{
		1460	struct gr_ctx_desc *gr_ctx;
		1461	struct channel_gk20a *ch;
		1462	int chid;
		1463	int ret = -EINVAL;
		1464
		1465	chid = g->gr.t18x.cilp_preempt_pending_chid;
		1466
		1467	ch = gk20a_channel_get(gk20a_fifo_channel_from_hw_chid(g, chid));
		1468	if (!ch)
		1469	return ret;
		1470
		1471	gr_ctx = ch->ch_ctx.gr_ctx;
		1472
		1473	if (gr_ctx->t18x.cilp_preempt_pending) {
		1474	*__chid = chid;
		1475	ret = 0;
		1476	}
		1477
		1478	gk20a_channel_put(ch);
		1479
		1480	return ret;
		1481	}
		1482
		1483	static int gr_gp10b_handle_fecs_error(struct gk20a *g,
		1484	struct channel_gk20a *__ch,
		1485	struct gr_gk20a_isr_data *isr_data)
		1486	{
		1487	u32 gr_fecs_intr = gk20a_readl(g, gr_fecs_host_int_status_r());
		1488	struct channel_gk20a *ch;
		1489	int chid = -1;
		1490	int ret = 0;
		1491
		1492	gk20a_dbg(gpu_dbg_fn \| gpu_dbg_gpu_dbg \| gpu_dbg_intr, "");
		1493
		1494	/*
		1495	* INTR1 (bit 1 of the HOST_INT_STATUS_CTXSW_INTR)
		1496	* indicates that a CILP ctxsw save has finished
		1497	*/
		1498	if (gr_fecs_intr & gr_fecs_host_int_status_ctxsw_intr_f(2)) {
		1499	gk20a_dbg(gpu_dbg_fn \| gpu_dbg_gpu_dbg \| gpu_dbg_intr,
		1500	"CILP: ctxsw save completed!\n");
		1501
		1502	/* now clear the interrupt */
		1503	gk20a_writel(g, gr_fecs_host_int_clear_r(),
		1504	gr_fecs_host_int_clear_ctxsw_intr1_clear_f());
		1505
		1506	ret = gr_gp10b_get_cilp_preempt_pending_chid(g, &chid);
		1507	if (ret)
		1508	goto clean_up;
		1509
		1510	ch = gk20a_channel_get(
		1511	gk20a_fifo_channel_from_hw_chid(g, chid));
		1512	if (!ch)
		1513	goto clean_up;
		1514
		1515
		1516	/* set preempt_pending to false */
		1517	ret = gr_gp10b_clear_cilp_preempt_pending(g, ch);
		1518	if (ret) {
		1519	gk20a_err(dev_from_gk20a(g), "CILP: error while unsetting CILP preempt pending!\n");
		1520	gk20a_channel_put(ch);
		1521	goto clean_up;
		1522	}
		1523
		1524	if (gk20a_gr_sm_debugger_attached(g)) {
		1525	gk20a_err(dev_from_gk20a(g), "CILP: posting usermode event");
		1526	gk20a_dbg_gpu_post_events(ch);
		1527	gk20a_channel_post_event(ch);
		1528	}
		1529
		1530	gk20a_channel_put(ch);
		1531	}
		1532
		1533	clean_up:
		1534	/* handle any remaining interrupts */
		1535	return gk20a_gr_handle_fecs_error(g, __ch, isr_data);
		1536	}
		1537
1227	static u32 gp10b_mask_hww_warp_esr(u32 hww_warp_esr)	1538	static u32 gp10b_mask_hww_warp_esr(u32 hww_warp_esr)
1228	{	1539	{
1229	if (!(hww_warp_esr & gr_gpc0_tpc0_sm_hww_warp_esr_addr_valid_m()))	1540	if (!(hww_warp_esr & gr_gpc0_tpc0_sm_hww_warp_esr_addr_valid_m()))
@@ -1267,4 +1578,7 @@ void gp10b_init_gr(struct gpu_ops *gops)
1267	gops->gr.handle_sm_exception = gr_gp10b_handle_sm_exception;	1578	gops->gr.handle_sm_exception = gr_gp10b_handle_sm_exception;
1268	gops->gr.handle_tex_exception = gr_gp10b_handle_tex_exception;	1579	gops->gr.handle_tex_exception = gr_gp10b_handle_tex_exception;
1269	gops->gr.mask_hww_warp_esr = gp10b_mask_hww_warp_esr;	1580	gops->gr.mask_hww_warp_esr = gp10b_mask_hww_warp_esr;
		1581	gops->gr.pre_process_sm_exception =
		1582	gr_gp10b_pre_process_sm_exception;
		1583	gops->gr.handle_fecs_error = gr_gp10b_handle_fecs_error;
1270	}	1584	}