diff options
author | Deepak Nibade <dnibade@nvidia.com> | 2016-01-06 04:07:38 -0500 |
---|---|---|
committer | Terje Bergstrom <tbergstrom@nvidia.com> | 2016-01-11 12:05:58 -0500 |
commit | 9713e3572a740216c6ecbc2257349be51c204a67 (patch) | |
tree | a2d5d82dbc94ca432295338695dafc5dc8cdfc16 /drivers | |
parent | 2b064ce65e0035a860d1bc3bcccfcf8aac1f31c7 (diff) |
gpu: nvgpu: disable ctxsw instead of all engines activity
In gk20a_channel_timeout_handler(), we currently disable
all engine activity before checking for fence completion
and before we identify timed out channel
But disabling all engine activity could be overkill for
this process.
Also, as part of disabling engine activity we preempt
the channel on engine.
But it is possible that channel preemption times out
since channel has already timed out
And this can lead to races and deadlock
Hence, instead of disabling all engine activity, just
disable the context switch which should also do the
same trick
Bug 1716062
Change-Id: I596515ed670a2e134f7bcd9758488a4aa0bf16f7
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/929421
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 10 |
1 files changed, 5 insertions, 5 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 0421c0f6..f0a700ac 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c | |||
@@ -1671,11 +1671,11 @@ static void gk20a_channel_timeout_handler(struct work_struct *work) | |||
1671 | ch->timeout.initialized = false; | 1671 | ch->timeout.initialized = false; |
1672 | mutex_unlock(&ch->timeout.lock); | 1672 | mutex_unlock(&ch->timeout.lock); |
1673 | 1673 | ||
1674 | if (gk20a_fifo_disable_all_engine_activity(g, true)) | 1674 | if (gr_gk20a_disable_ctxsw(g)) |
1675 | goto fail_unlock; | 1675 | goto fail_unlock; |
1676 | 1676 | ||
1677 | if (gk20a_fence_is_expired(job->post_fence)) | 1677 | if (gk20a_fence_is_expired(job->post_fence)) |
1678 | goto fail_enable_engine_activity; | 1678 | goto fail_enable_ctxsw; |
1679 | 1679 | ||
1680 | gk20a_err(dev_from_gk20a(g), "Job on channel %d timed out\n", | 1680 | gk20a_err(dev_from_gk20a(g), "Job on channel %d timed out\n", |
1681 | ch->hw_chid); | 1681 | ch->hw_chid); |
@@ -1698,7 +1698,7 @@ static void gk20a_channel_timeout_handler(struct work_struct *work) | |||
1698 | /* If failing engine, trigger recovery */ | 1698 | /* If failing engine, trigger recovery */ |
1699 | failing_ch = gk20a_channel_get(&g->fifo.channel[id]); | 1699 | failing_ch = gk20a_channel_get(&g->fifo.channel[id]); |
1700 | if (!failing_ch) | 1700 | if (!failing_ch) |
1701 | goto fail_enable_engine_activity; | 1701 | goto fail_enable_ctxsw; |
1702 | 1702 | ||
1703 | if (failing_ch->hw_chid != ch->hw_chid) | 1703 | if (failing_ch->hw_chid != ch->hw_chid) |
1704 | gk20a_channel_timeout_start(ch, job); | 1704 | gk20a_channel_timeout_start(ch, job); |
@@ -1710,8 +1710,8 @@ static void gk20a_channel_timeout_handler(struct work_struct *work) | |||
1710 | gk20a_channel_put(failing_ch); | 1710 | gk20a_channel_put(failing_ch); |
1711 | } | 1711 | } |
1712 | 1712 | ||
1713 | fail_enable_engine_activity: | 1713 | fail_enable_ctxsw: |
1714 | gk20a_fifo_enable_all_engine_activity(g); | 1714 | gr_gk20a_enable_ctxsw(g); |
1715 | fail_unlock: | 1715 | fail_unlock: |
1716 | mutex_unlock(&g->ch_wdt_lock); | 1716 | mutex_unlock(&g->ch_wdt_lock); |
1717 | gk20a_channel_put(ch); | 1717 | gk20a_channel_put(ch); |