From 2d3e99067ea47d66a9490c405dcebc46e7fcdf03 Mon Sep 17 00:00:00 2001 From: Deepak Date: Wed, 24 Oct 2018 17:46:09 -0700 Subject: gpu: nvgpu: vgpu: Get channel reference - In vGPU code path, function vgpu_channel_abort_cleanup() does not obtain channel reference before using the channel structure (channel_gk20a) - vgpu_channel_abort_cleanup() is called by vgpu_intr_thread() which runs commands obtained from interrupt queue from RM server. - If there is a scenario where gk20a_channel_release() function runs before guest receives notification from RM server to abort channel cleanup, channel gets freed before vgpu_channel_abort_cleanup() runs. - However, because vgpu_channel_abort_cleanup() does not take explicit reference to the channel, it ends up accessing structures (such as ch->g) which are set to NULL and thus we end up in a crash. - This patch explicitly takes reference of channel before vgpu_channel_abort_cleanup() is called. - If gk20a_channel_release() runs before vgpu_channel_abort_cleanup() and ends up freeing channel, we dont get reference to freed channel in vgpu_channel_abort_cleanup() and thus we return from function rather than continuing with freed channel as was the case previously. Bug 200453473 JIRA EVLR-3411 Change-Id: I311043b2231336616b28246531cf8a0dc151b0cd Signed-off-by: Deepak Bhosale Reviewed-on: https://git-master.nvidia.com/r/1932028 (cherry picked from commit b91228e506c14f04945e35b5c996f711bb30a155) Reviewed-on: https://git-master.nvidia.com/r/1970807 Reviewed-by: Aparna Das Reviewed-by: Automatic_Commit_Validation_User Tested-by: Karl Ding GVS: Gerrit_Virtual_Submit Reviewed-by: Nirav Patel Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/vgpu/vgpu.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/nvgpu/vgpu') diff --git a/drivers/gpu/nvgpu/vgpu/vgpu.c b/drivers/gpu/nvgpu/vgpu/vgpu.c index 7d7df9b3..266b801e 100644 --- a/drivers/gpu/nvgpu/vgpu/vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/vgpu.c @@ -119,10 +119,16 @@ static void vgpu_handle_channel_event(struct gk20a *g, static void vgpu_channel_abort_cleanup(struct gk20a *g, u32 chid) { - struct channel_gk20a *ch = &g->fifo.channel[chid]; + struct channel_gk20a *ch = gk20a_channel_get(&g->fifo.channel[chid]); + + if (ch == NULL) { + nvgpu_err(g, "invalid channel id %d", chid); + return; + } ch->has_timedout = true; g->ops.fifo.ch_abort_clean_up(ch); + gk20a_channel_put(ch); } static void vgpu_set_error_notifier(struct gk20a *g, -- cgit v1.2.2