diff options
author | Konsta Holtta <kholtta@nvidia.com> | 2019-01-07 08:39:09 -0500 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2019-02-18 09:01:57 -0500 |
commit | 5e440e63d67058834b17e4cd28d3e5c9e9b8c6e2 (patch) | |
tree | 7d76875b719216c71a56cbd4a24a47f79603e94e /drivers/gpu/nvgpu/common | |
parent | b5d13e16aeec37281e92111e17be368b406f8511 (diff) |
gpu: nvgpu: abstract out timeout rewinding
The channel timeout ends up in a strange state during timeout handling
for a brief moment; it can become stopped and started again, and the
timeout lock is released in the middle. Add a more explicit rewind
function to reset the timeout to start if it's active. The active check
allows to use this from gk20a_channel_timeout_restart_all_channels(), so
that's also modified.
Also replace the return statements with more readable control flow in
gk20a_channel_timeout_handler().
Bug 200484795
Change-Id: Ia7d67242dfc149ace1f4f841a837e90b6c985308
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1989327
Reviewed-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
(cherry picked from commit 8979a97af3dbb65904f9db8a22a6a168a3f41447
in dev-kernel)
Reviewed-on: https://git-master.nvidia.com/r/2017922
Reviewed-by: Debarshi Dutta <ddutta@nvidia.com>
Tested-by: Debarshi Dutta <ddutta@nvidia.com>
Reviewed-by: Bibek Basu <bbasu@nvidia.com>
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/common')
-rw-r--r-- | drivers/gpu/nvgpu/common/fifo/channel.c | 56 |
1 files changed, 33 insertions, 23 deletions
diff --git a/drivers/gpu/nvgpu/common/fifo/channel.c b/drivers/gpu/nvgpu/common/fifo/channel.c index 0174e369..fc82748b 100644 --- a/drivers/gpu/nvgpu/common/fifo/channel.c +++ b/drivers/gpu/nvgpu/common/fifo/channel.c | |||
@@ -1474,6 +1474,25 @@ static void gk20a_channel_timeout_continue(struct channel_gk20a *ch) | |||
1474 | } | 1474 | } |
1475 | 1475 | ||
1476 | /** | 1476 | /** |
1477 | * Reset the counter of a timeout that is in effect. | ||
1478 | * | ||
1479 | * If this channel has an active timeout, act as if something happened on the | ||
1480 | * channel right now. | ||
1481 | * | ||
1482 | * Rewinding a stopped counter is irrelevant; this is a no-op for non-running | ||
1483 | * timeouts. Stopped timeouts can only be started (which is technically a | ||
1484 | * rewind too) or continued (where the stop is actually pause). | ||
1485 | */ | ||
1486 | static void gk20a_channel_timeout_rewind(struct channel_gk20a *ch) | ||
1487 | { | ||
1488 | nvgpu_raw_spinlock_acquire(&ch->timeout.lock); | ||
1489 | if (ch->timeout.running) { | ||
1490 | __gk20a_channel_timeout_start(ch); | ||
1491 | } | ||
1492 | nvgpu_raw_spinlock_release(&ch->timeout.lock); | ||
1493 | } | ||
1494 | |||
1495 | /** | ||
1477 | * Rewind the timeout on each non-dormant channel. | 1496 | * Rewind the timeout on each non-dormant channel. |
1478 | * | 1497 | * |
1479 | * Reschedule the timeout of each active channel for which timeouts are running | 1498 | * Reschedule the timeout of each active channel for which timeouts are running |
@@ -1491,11 +1510,7 @@ void gk20a_channel_timeout_restart_all_channels(struct gk20a *g) | |||
1491 | 1510 | ||
1492 | if (ch != NULL) { | 1511 | if (ch != NULL) { |
1493 | if (!gk20a_channel_check_timedout(ch)) { | 1512 | if (!gk20a_channel_check_timedout(ch)) { |
1494 | nvgpu_raw_spinlock_acquire(&ch->timeout.lock); | 1513 | gk20a_channel_timeout_rewind(ch); |
1495 | if (ch->timeout.running) { | ||
1496 | __gk20a_channel_timeout_start(ch); | ||
1497 | } | ||
1498 | nvgpu_raw_spinlock_release(&ch->timeout.lock); | ||
1499 | } | 1514 | } |
1500 | gk20a_channel_put(ch); | 1515 | gk20a_channel_put(ch); |
1501 | } | 1516 | } |
@@ -1538,28 +1553,23 @@ static void gk20a_channel_timeout_handler(struct channel_gk20a *ch) | |||
1538 | new_pb_get = g->ops.fifo.userd_pb_get(ch->g, ch); | 1553 | new_pb_get = g->ops.fifo.userd_pb_get(ch->g, ch); |
1539 | 1554 | ||
1540 | if (new_gp_get != gp_get || new_pb_get != pb_get) { | 1555 | if (new_gp_get != gp_get || new_pb_get != pb_get) { |
1541 | /* Channel has advanced, rewind timer */ | 1556 | /* Channel has advanced, timer keeps going but resets */ |
1542 | gk20a_channel_timeout_stop(ch); | 1557 | gk20a_channel_timeout_rewind(ch); |
1543 | gk20a_channel_timeout_start(ch); | 1558 | } else if (nvgpu_timeout_peek_expired(&ch->timeout.timer) == 0) { |
1544 | return; | ||
1545 | } | ||
1546 | |||
1547 | if (nvgpu_timeout_peek_expired(&ch->timeout.timer) == 0) { | ||
1548 | /* Seems stuck but waiting to time out */ | 1559 | /* Seems stuck but waiting to time out */ |
1549 | return; | 1560 | } else { |
1550 | } | 1561 | nvgpu_err(g, "Job on channel %d timed out", |
1562 | ch->chid); | ||
1551 | 1563 | ||
1552 | nvgpu_err(g, "Job on channel %d timed out", | 1564 | /* force reset calls gk20a_debug_dump but not this */ |
1553 | ch->chid); | 1565 | if (ch->timeout.debug_dump) { |
1566 | gk20a_gr_debug_dump(g); | ||
1567 | } | ||
1554 | 1568 | ||
1555 | /* force reset calls gk20a_debug_dump but not this */ | 1569 | g->ops.fifo.force_reset_ch(ch, |
1556 | if (ch->timeout.debug_dump) { | 1570 | NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT, |
1557 | gk20a_gr_debug_dump(g); | 1571 | ch->timeout.debug_dump); |
1558 | } | 1572 | } |
1559 | |||
1560 | g->ops.fifo.force_reset_ch(ch, | ||
1561 | NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT, | ||
1562 | ch->timeout.debug_dump); | ||
1563 | } | 1573 | } |
1564 | 1574 | ||
1565 | /** | 1575 | /** |