summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
diff options
context:
space:
mode:
authorDeepak Nibade <dnibade@nvidia.com>2016-01-06 04:07:38 -0500
committerTerje Bergstrom <tbergstrom@nvidia.com>2016-01-11 12:05:58 -0500
commit9713e3572a740216c6ecbc2257349be51c204a67 (patch)
treea2d5d82dbc94ca432295338695dafc5dc8cdfc16 /drivers/gpu/nvgpu/gk20a/channel_gk20a.c
parent2b064ce65e0035a860d1bc3bcccfcf8aac1f31c7 (diff)
gpu: nvgpu: disable ctxsw instead of all engines activity
In gk20a_channel_timeout_handler(), we currently disable all engine activity before checking for fence completion and before we identify timed out channel But disabling all engine activity could be overkill for this process. Also, as part of disabling engine activity we preempt the channel on engine. But it is possible that channel preemption times out since channel has already timed out And this can lead to races and deadlock Hence, instead of disabling all engine activity, just disable the context switch which should also do the same trick Bug 1716062 Change-Id: I596515ed670a2e134f7bcd9758488a4aa0bf16f7 Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: http://git-master/r/929421 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/channel_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c10
1 files changed, 5 insertions, 5 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 0421c0f6..f0a700ac 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -1671,11 +1671,11 @@ static void gk20a_channel_timeout_handler(struct work_struct *work)
1671 ch->timeout.initialized = false; 1671 ch->timeout.initialized = false;
1672 mutex_unlock(&ch->timeout.lock); 1672 mutex_unlock(&ch->timeout.lock);
1673 1673
1674 if (gk20a_fifo_disable_all_engine_activity(g, true)) 1674 if (gr_gk20a_disable_ctxsw(g))
1675 goto fail_unlock; 1675 goto fail_unlock;
1676 1676
1677 if (gk20a_fence_is_expired(job->post_fence)) 1677 if (gk20a_fence_is_expired(job->post_fence))
1678 goto fail_enable_engine_activity; 1678 goto fail_enable_ctxsw;
1679 1679
1680 gk20a_err(dev_from_gk20a(g), "Job on channel %d timed out\n", 1680 gk20a_err(dev_from_gk20a(g), "Job on channel %d timed out\n",
1681 ch->hw_chid); 1681 ch->hw_chid);
@@ -1698,7 +1698,7 @@ static void gk20a_channel_timeout_handler(struct work_struct *work)
1698 /* If failing engine, trigger recovery */ 1698 /* If failing engine, trigger recovery */
1699 failing_ch = gk20a_channel_get(&g->fifo.channel[id]); 1699 failing_ch = gk20a_channel_get(&g->fifo.channel[id]);
1700 if (!failing_ch) 1700 if (!failing_ch)
1701 goto fail_enable_engine_activity; 1701 goto fail_enable_ctxsw;
1702 1702
1703 if (failing_ch->hw_chid != ch->hw_chid) 1703 if (failing_ch->hw_chid != ch->hw_chid)
1704 gk20a_channel_timeout_start(ch, job); 1704 gk20a_channel_timeout_start(ch, job);
@@ -1710,8 +1710,8 @@ static void gk20a_channel_timeout_handler(struct work_struct *work)
1710 gk20a_channel_put(failing_ch); 1710 gk20a_channel_put(failing_ch);
1711 } 1711 }
1712 1712
1713fail_enable_engine_activity: 1713fail_enable_ctxsw:
1714 gk20a_fifo_enable_all_engine_activity(g); 1714 gr_gk20a_enable_ctxsw(g);
1715fail_unlock: 1715fail_unlock:
1716 mutex_unlock(&g->ch_wdt_lock); 1716 mutex_unlock(&g->ch_wdt_lock);
1717 gk20a_channel_put(ch); 1717 gk20a_channel_put(ch);