From 5e440e63d67058834b17e4cd28d3e5c9e9b8c6e2 Mon Sep 17 00:00:00 2001 From: Konsta Holtta Date: Mon, 7 Jan 2019 15:39:09 +0200 Subject: gpu: nvgpu: abstract out timeout rewinding The channel timeout ends up in a strange state during timeout handling for a brief moment; it can become stopped and started again, and the timeout lock is released in the middle. Add a more explicit rewind function to reset the timeout to start if it's active. The active check allows to use this from gk20a_channel_timeout_restart_all_channels(), so that's also modified. Also replace the return statements with more readable control flow in gk20a_channel_timeout_handler(). Bug 200484795 Change-Id: Ia7d67242dfc149ace1f4f841a837e90b6c985308 Signed-off-by: Konsta Holtta Reviewed-on: https://git-master.nvidia.com/r/1989327 Reviewed-by: Deepak Nibade Reviewed-by: Terje Bergstrom (cherry picked from commit 8979a97af3dbb65904f9db8a22a6a168a3f41447 in dev-kernel) Reviewed-on: https://git-master.nvidia.com/r/2017922 Reviewed-by: Debarshi Dutta Tested-by: Debarshi Dutta Reviewed-by: Bibek Basu Reviewed-by: svc-mobile-coverity GVS: Gerrit_Virtual_Submit Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/common/fifo/channel.c | 56 +++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 23 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/nvgpu/common/fifo/channel.c b/drivers/gpu/nvgpu/common/fifo/channel.c index 0174e369..fc82748b 100644 --- a/drivers/gpu/nvgpu/common/fifo/channel.c +++ b/drivers/gpu/nvgpu/common/fifo/channel.c @@ -1473,6 +1473,25 @@ static void gk20a_channel_timeout_continue(struct channel_gk20a *ch) nvgpu_raw_spinlock_release(&ch->timeout.lock); } +/** + * Reset the counter of a timeout that is in effect. + * + * If this channel has an active timeout, act as if something happened on the + * channel right now. + * + * Rewinding a stopped counter is irrelevant; this is a no-op for non-running + * timeouts. Stopped timeouts can only be started (which is technically a + * rewind too) or continued (where the stop is actually pause). + */ +static void gk20a_channel_timeout_rewind(struct channel_gk20a *ch) +{ + nvgpu_raw_spinlock_acquire(&ch->timeout.lock); + if (ch->timeout.running) { + __gk20a_channel_timeout_start(ch); + } + nvgpu_raw_spinlock_release(&ch->timeout.lock); +} + /** * Rewind the timeout on each non-dormant channel. * @@ -1491,11 +1510,7 @@ void gk20a_channel_timeout_restart_all_channels(struct gk20a *g) if (ch != NULL) { if (!gk20a_channel_check_timedout(ch)) { - nvgpu_raw_spinlock_acquire(&ch->timeout.lock); - if (ch->timeout.running) { - __gk20a_channel_timeout_start(ch); - } - nvgpu_raw_spinlock_release(&ch->timeout.lock); + gk20a_channel_timeout_rewind(ch); } gk20a_channel_put(ch); } @@ -1538,28 +1553,23 @@ static void gk20a_channel_timeout_handler(struct channel_gk20a *ch) new_pb_get = g->ops.fifo.userd_pb_get(ch->g, ch); if (new_gp_get != gp_get || new_pb_get != pb_get) { - /* Channel has advanced, rewind timer */ - gk20a_channel_timeout_stop(ch); - gk20a_channel_timeout_start(ch); - return; - } - - if (nvgpu_timeout_peek_expired(&ch->timeout.timer) == 0) { + /* Channel has advanced, timer keeps going but resets */ + gk20a_channel_timeout_rewind(ch); + } else if (nvgpu_timeout_peek_expired(&ch->timeout.timer) == 0) { /* Seems stuck but waiting to time out */ - return; - } + } else { + nvgpu_err(g, "Job on channel %d timed out", + ch->chid); - nvgpu_err(g, "Job on channel %d timed out", - ch->chid); + /* force reset calls gk20a_debug_dump but not this */ + if (ch->timeout.debug_dump) { + gk20a_gr_debug_dump(g); + } - /* force reset calls gk20a_debug_dump but not this */ - if (ch->timeout.debug_dump) { - gk20a_gr_debug_dump(g); + g->ops.fifo.force_reset_ch(ch, + NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT, + ch->timeout.debug_dump); } - - g->ops.fifo.force_reset_ch(ch, - NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT, - ch->timeout.debug_dump); } /** -- cgit v1.2.2