gpu: nvgpu: post bpt events after processing

We currently post bpt events (bpt.int and bpt.pause) even before we process and clear the interrupts and this could cause races with UMD Fix this by posting bpt events only after we are done processing the interrupts Bug 200209410 Change-Id: Ic3ff7148189fccb796cb6175d6d22ac25a4097fb Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: http://git-master/r/1184109 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
author: Deepak Nibade <dnibade@nvidia.com> 2016-07-20 03:09:12 -0400
committer: Terje Bergstrom <tbergstrom@nvidia.com> 2016-08-10 14:17:58 -0400
commit: 59a115f3fe6076de4c9af69de836cc82d6430544 (patch)
tree: 18af9e6080ee1306b1737ac74f392d88150e7a3c /drivers/gpu/nvgpu/gk20a/gr_gk20a.c
parent: 118809f4bd07af20df2b6c012828834695a5fccf (diff)
1 files changed, 46 insertions, 32 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 979f69ba..271c384a 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -5869,7 +5869,8 @@ fail:
 }
 int gr_gk20a_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc,
-                bool *post_event, struct channel_gk20a *fault_ch)
+                bool *post_event, struct channel_gk20a *fault_ch,
+                u32 *hww_global_esr)
 {
        int ret = 0;
        bool do_warp_sync = false, early_exit = false, ignore_debugger = false;
@@ -5901,35 +5902,12 @@ int gr_gk20a_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc,
                return -EFAULT;
        }
-        if (global_esr & gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f()) {
-                if (gk20a_is_channel_marked_as_tsg(fault_ch)) {
-                        struct tsg_gk20a *tsg = &g->fifo.tsg[fault_ch->tsgid];
-                        gk20a_tsg_event_id_post_event(tsg,
-                                NVGPU_IOCTL_CHANNEL_EVENT_ID_BPT_INT);
-                } else {
-                        gk20a_channel_event_id_post_event(fault_ch,
-                                NVGPU_IOCTL_CHANNEL_EVENT_ID_BPT_INT);
-                }
-        }
-        if (global_esr & gr_gpc0_tpc0_sm_hww_global_esr_bpt_pause_pending_f()) {
-                if (gk20a_is_channel_marked_as_tsg(fault_ch)) {
-                        struct tsg_gk20a *tsg = &g->fifo.tsg[fault_ch->tsgid];
-                        gk20a_tsg_event_id_post_event(tsg,
-                                NVGPU_IOCTL_CHANNEL_EVENT_ID_BPT_PAUSE);
-                } else {
-                        gk20a_channel_event_id_post_event(fault_ch,
-                                NVGPU_IOCTL_CHANNEL_EVENT_ID_BPT_PAUSE);
-                }
-        }
        gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg,
                  "sm hww global %08x warp %08x", global_esr, warp_esr);
        gr_gk20a_elpg_protected_call(g,
                g->ops.gr.record_sm_error_state(g, gpc, tpc));
+        *hww_global_esr = global_esr;
        if (g->ops.gr.pre_process_sm_exception) {
                ret = g->ops.gr.pre_process_sm_exception(g, gpc, tpc,
@@ -5946,7 +5924,7 @@ int gr_gk20a_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc,
        if (early_exit) {
                gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg,
-                                "returning early, skipping event posting");
+                                "returning early");
                return ret;
        }
@@ -6009,7 +5987,8 @@ int gr_gk20a_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
 }
 static int gk20a_gr_handle_tpc_exception(struct gk20a *g, u32 gpc, u32 tpc,
-                bool *post_event, struct channel_gk20a *fault_ch)
+                bool *post_event, struct channel_gk20a *fault_ch,
+                u32 *hww_global_esr)
 {
        int ret = 0;
        u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
@@ -6026,7 +6005,8 @@ static int gk20a_gr_handle_tpc_exception(struct gk20a *g, u32 gpc, u32 tpc,
                gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg,
                                "GPC%d TPC%d: SM exception pending", gpc, tpc);
                ret = g->ops.gr.handle_sm_exception(g, gpc, tpc,
-                                                        post_event, fault_ch);
+                                                        post_event, fault_ch,
+                                                        hww_global_esr);
        }
        /* check if a tex exeption is pending */
@@ -6041,7 +6021,7 @@ static int gk20a_gr_handle_tpc_exception(struct gk20a *g, u32 gpc, u32 tpc,
 }
 static int gk20a_gr_handle_gpc_exception(struct gk20a *g, bool *post_event,
-                struct channel_gk20a *fault_ch)
+                struct channel_gk20a *fault_ch, u32 *hww_global_esr)
 {
        int ret = 0;
        u32 gpc_offset, tpc_offset, gpc, tpc;
@@ -6081,7 +6061,7 @@ static int gk20a_gr_handle_gpc_exception(struct gk20a *g, bool *post_event,
                                        gpc_offset + tpc_offset);
                        ret = gk20a_gr_handle_tpc_exception(g, gpc, tpc,
-                                        post_event, fault_ch);
+                                        post_event, fault_ch, hww_global_esr);
                        /* clear the hwws, also causes tpc and gpc
                         * exceptions to be cleared */
@@ -6092,6 +6072,35 @@ static int gk20a_gr_handle_gpc_exception(struct gk20a *g, bool *post_event,
        return ret;
 }
+static int gk20a_gr_post_bpt_events(struct gk20a *g, struct channel_gk20a *ch,
+                                    u32 global_esr)
+{
+        if (global_esr & gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f()) {
+                if (gk20a_is_channel_marked_as_tsg(ch)) {
+                        struct tsg_gk20a *tsg = &g->fifo.tsg[ch->tsgid];
+                        gk20a_tsg_event_id_post_event(tsg,
+                                NVGPU_IOCTL_CHANNEL_EVENT_ID_BPT_INT);
+                } else {
+                        gk20a_channel_event_id_post_event(ch,
+                                NVGPU_IOCTL_CHANNEL_EVENT_ID_BPT_INT);
+                }
+        }
+        if (global_esr & gr_gpc0_tpc0_sm_hww_global_esr_bpt_pause_pending_f()) {
+                if (gk20a_is_channel_marked_as_tsg(ch)) {
+                        struct tsg_gk20a *tsg = &g->fifo.tsg[ch->tsgid];
+                        gk20a_tsg_event_id_post_event(tsg,
+                                NVGPU_IOCTL_CHANNEL_EVENT_ID_BPT_PAUSE);
+                } else {
+                        gk20a_channel_event_id_post_event(ch,
+                                NVGPU_IOCTL_CHANNEL_EVENT_ID_BPT_PAUSE);
+                }
+        }
+        return 0;
+}
 int gk20a_gr_isr(struct gk20a *g)
 {
        struct device *dev = dev_from_gk20a(g);
@@ -6101,8 +6110,10 @@ int gk20a_gr_isr(struct gk20a *g)
        int need_reset = 0;
        u32 gr_intr = gk20a_readl(g, gr_intr_r());
        struct channel_gk20a *ch = NULL;
+        struct channel_gk20a *fault_ch = NULL;
        int tsgid = NVGPU_INVALID_TSG_ID;
        u32 gr_engine_id;
+        u32 global_esr = 0;
        gk20a_dbg_fn("");
        gk20a_dbg(gpu_dbg_intr, "pgraph intr %08x", gr_intr);
@@ -6235,7 +6246,6 @@ int gk20a_gr_isr(struct gk20a *g)
                /* check if a gpc exception has occurred */
                if (exception & gr_exception_gpc_m() && need_reset == 0) {
-                        struct channel_gk20a *fault_ch;
                        bool post_event = false;
                        gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "GPC exception pending");
@@ -6246,7 +6256,7 @@ int gk20a_gr_isr(struct gk20a *g)
                        /* check if any gpc has an exception */
                        need_reset |= gk20a_gr_handle_gpc_exception(g,
-                                        &post_event, fault_ch);
+                                        &post_event, fault_ch, &global_esr);
                        /* signal clients waiting on an event */
                        if (gk20a_gr_sm_debugger_attached(g) && post_event && fault_ch) {
@@ -6310,6 +6320,10 @@ int gk20a_gr_isr(struct gk20a *g)
                gk20a_err(dev_from_gk20a(g),
                           "unhandled gr interrupt 0x%08x", gr_intr);
+        /* Posting of BPT events should be the last thing in this function */
+        if (global_esr && fault_ch)
+                gk20a_gr_post_bpt_events(g, fault_ch, global_esr);
        if (ch)
                gk20a_channel_put(ch);
author	Deepak Nibade <dnibade@nvidia.com>	2016-07-20 03:09:12 -0400
committer	Terje Bergstrom <tbergstrom@nvidia.com>	2016-08-10 14:17:58 -0400
commit	59a115f3fe6076de4c9af69de836cc82d6430544 (patch)
tree	18af9e6080ee1306b1737ac74f392d88150e7a3c /drivers/gpu/nvgpu/gk20a/gr_gk20a.c
parent	118809f4bd07af20df2b6c012828834695a5fccf (diff)