summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorDeepak Nibade <dnibade@nvidia.com>2018-05-30 21:47:44 -0400
committerTejal Kudav <tkudav@nvidia.com>2018-06-14 09:44:06 -0400
commit4252e00aa6f2a82ecf608f86d8057bac8cc97e15 (patch)
tree0952089c4517916edfd962d82ab034e993f3f5c8 /drivers
parent7aa928fa07066b1b9ac6ffb2edf0b473f10a2518 (diff)
gpu: nvgpu: fix crash due to accessing incorrect TSG pointer
In gk20a_gr_isr(), we handle various errors including GPC/TPC errors. And then if BPT errors are pending we call gk20a_gr_post_bpt_events() at the end and pass channel pointer to it gk20a_gr_post_bpt_events() extracts TSG pointer based on ch->tsgid But in some race conditions it is possible that we clear the error and trigger recovery and as a result channel is unbounded from TSG and closed by user space before calling gk20a_gr_post_bpt_events() And in that case the code above results in getting incorrect TSG pointer and hence crashes as below Unable to handle kernel paging request at virtual address ffffff8012000c08 ... [<ffffff8008081f84>] el1_da+0x24/0xb4 [<ffffff80086e72e0>] gk20a_tsg_get_event_data_from_id+0x30/0xb0 [<ffffff80086e7560>] gk20a_tsg_event_id_post_event+0x50/0xc8 [<ffffff800872922c>] gk20a_gr_isr+0x27c/0x12e0 To fix this extract the TSG pointer before handling all the errors and pass this pointer to gk20a_gr_post_bpt_events() will post the events if they are enabled and if TSG is still open Bug 200404720 Change-Id: I4861c72e338a2cec96f31cb9488af665c5f2be39 Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1735415 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Vinod Gopalakrishnakurup <vinodg@nvidia.com> Reviewed-by: Alex Waterman <alexw@nvidia.com> Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c19
1 files changed, 9 insertions, 10 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index b69618ae..d4b31c86 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -5837,19 +5837,14 @@ static int gk20a_gr_handle_gpc_exception(struct gk20a *g, bool *post_event,
5837 return ret; 5837 return ret;
5838} 5838}
5839 5839
5840static int gk20a_gr_post_bpt_events(struct gk20a *g, struct channel_gk20a *ch, 5840static int gk20a_gr_post_bpt_events(struct gk20a *g, struct tsg_gk20a *tsg,
5841 u32 global_esr) 5841 u32 global_esr)
5842{ 5842{
5843 if (global_esr & gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f()) { 5843 if (global_esr & gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f())
5844 struct tsg_gk20a *tsg = &g->fifo.tsg[ch->tsgid];
5845
5846 g->ops.fifo.post_event_id(tsg, NVGPU_EVENT_ID_BPT_INT); 5844 g->ops.fifo.post_event_id(tsg, NVGPU_EVENT_ID_BPT_INT);
5847 }
5848 if (global_esr & gr_gpc0_tpc0_sm_hww_global_esr_bpt_pause_pending_f()) {
5849 struct tsg_gk20a *tsg = &g->fifo.tsg[ch->tsgid];
5850 5845
5846 if (global_esr & gr_gpc0_tpc0_sm_hww_global_esr_bpt_pause_pending_f())
5851 g->ops.fifo.post_event_id(tsg, NVGPU_EVENT_ID_BPT_PAUSE); 5847 g->ops.fifo.post_event_id(tsg, NVGPU_EVENT_ID_BPT_PAUSE);
5852 }
5853 5848
5854 return 0; 5849 return 0;
5855} 5850}
@@ -5864,6 +5859,7 @@ int gk20a_gr_isr(struct gk20a *g)
5864 struct channel_gk20a *ch = NULL; 5859 struct channel_gk20a *ch = NULL;
5865 struct channel_gk20a *fault_ch = NULL; 5860 struct channel_gk20a *fault_ch = NULL;
5866 int tsgid = NVGPU_INVALID_TSG_ID; 5861 int tsgid = NVGPU_INVALID_TSG_ID;
5862 struct tsg_gk20a *tsg = NULL;
5867 u32 gr_engine_id; 5863 u32 gr_engine_id;
5868 u32 global_esr = 0; 5864 u32 global_esr = 0;
5869 5865
@@ -5903,6 +5899,9 @@ int gk20a_gr_isr(struct gk20a *g)
5903 nvgpu_err(g, "ch id is INVALID 0xffffffff"); 5899 nvgpu_err(g, "ch id is INVALID 0xffffffff");
5904 } 5900 }
5905 5901
5902 if (ch && gk20a_is_channel_marked_as_tsg(ch))
5903 tsg = &g->fifo.tsg[ch->tsgid];
5904
5906 nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, 5905 nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
5907 "channel %d: addr 0x%08x, " 5906 "channel %d: addr 0x%08x, "
5908 "data 0x%08x 0x%08x," 5907 "data 0x%08x 0x%08x,"
@@ -6126,8 +6125,8 @@ int gk20a_gr_isr(struct gk20a *g)
6126 "unhandled gr interrupt 0x%08x", gr_intr); 6125 "unhandled gr interrupt 0x%08x", gr_intr);
6127 6126
6128 /* Posting of BPT events should be the last thing in this function */ 6127 /* Posting of BPT events should be the last thing in this function */
6129 if (global_esr && fault_ch) 6128 if (global_esr && tsg)
6130 gk20a_gr_post_bpt_events(g, fault_ch, global_esr); 6129 gk20a_gr_post_bpt_events(g, tsg, global_esr);
6131 6130
6132 if (ch) 6131 if (ch)
6133 gk20a_channel_put(ch); 6132 gk20a_channel_put(ch);