gpu: nvgpu: Base channel watchdog on gp_get

Instead of checking if a job is complete, only check that channel is making progress by checking its gp_get is advancing. This will make the watchdog conservative. Previously a whole job had x seconds to complete. Now channel has x seconds to get host to consume each push buffer segment. Bug 1861838 Bug 200273419 Bug 200263100 Change-Id: I70adc1f50301bce8db7dac675771c251c0f11b70 Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/1294850 Reviewed-by: Automatic_Commit_Validation_User
author: Terje Bergstrom <tbergstrom@nvidia.com> 2017-01-26 18:53:51 -0500
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2017-01-30 12:53:43 -0500
commit: cf8d9ccf8e8878431d93605e3e1b7c0211479cfe (patch)
tree: eb3fd7db26630734d403d27548ec93eb33c2bee9 /drivers/gpu/nvgpu/gk20a/channel_gk20a.c
parent: ed220567798156aee74291044a6b2d9586cdee71 (diff)
1 files changed, 9 insertions, 15 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 1ee9b59a..e272b130 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -2078,8 +2078,7 @@ static void trace_write_pushbuffer_range(struct channel_gk20a *c,
                nvgpu_kfree(g);
 }
-static void gk20a_channel_timeout_start(struct channel_gk20a *ch,
+static void gk20a_channel_timeout_start(struct channel_gk20a *ch)
-                struct channel_gk20a_job *job)
 {
        struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev);
@@ -2096,7 +2095,7 @@ static void gk20a_channel_timeout_start(struct channel_gk20a *ch,
                return;
        }
-        ch->timeout.job = job;
+        ch->timeout.gp_get = gk20a_userd_gp_get(ch->g, ch);
        ch->timeout.initialized = true;
        raw_spin_unlock(&ch->timeout.lock);
@@ -2150,7 +2149,7 @@ void gk20a_channel_timeout_restart_all_channels(struct gk20a *g)
 static void gk20a_channel_timeout_handler(struct work_struct *work)
 {
-        struct channel_gk20a_job *job;
+        u32 gp_get;
        struct gk20a *g;
        struct channel_gk20a *ch;
@@ -2170,23 +2169,18 @@ static void gk20a_channel_timeout_handler(struct work_struct *work)
        /* Need global lock since multiple channels can timeout at a time */
        mutex_lock(&g->ch_wdt_lock);
-        gk20a_err(dev_from_gk20a(g), "Possible job timeout on ch=%d",
-                  ch->hw_chid);
        /* Get timed out job and reset the timer */
        raw_spin_lock(&ch->timeout.lock);
-        job = ch->timeout.job;
+        gp_get = ch->timeout.gp_get;
        ch->timeout.initialized = false;
        raw_spin_unlock(&ch->timeout.lock);
-        if (gk20a_fence_is_expired(job->post_fence)) {
+        if (gk20a_userd_gp_get(ch->g, ch) != gp_get) {
-                gk20a_err(dev_from_gk20a(g),
+                gk20a_channel_timeout_start(ch);
-                          "Timed out fence is expired on c=%d!",
-                          ch->hw_chid);
                goto fail_unlock;
        }
-        gk20a_err(dev_from_gk20a(g), "Confirmed: job on channel %d timed out",
+        gk20a_err(dev_from_gk20a(g), "Job on channel %d timed out",
                  ch->hw_chid);
        gk20a_debug_dump(g->dev);
@@ -2275,7 +2269,7 @@ static int gk20a_channel_add_job(struct channel_gk20a *c,
                job->num_mapped_buffers = num_mapped_buffers;
                job->mapped_buffers = mapped_buffers;
-                gk20a_channel_timeout_start(c, job);
+                gk20a_channel_timeout_start(c);
                if (!pre_alloc_enabled)
                        channel_gk20a_joblist_lock(c);
@@ -2357,7 +2351,7 @@ static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
                completed = gk20a_fence_is_expired(job->post_fence);
                if (!completed) {
-                        gk20a_channel_timeout_start(c, job);
+                        gk20a_channel_timeout_start(c);
                        break;
                }
author	Terje Bergstrom <tbergstrom@nvidia.com>	2017-01-26 18:53:51 -0500
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2017-01-30 12:53:43 -0500
commit	cf8d9ccf8e8878431d93605e3e1b7c0211479cfe (patch)
tree	eb3fd7db26630734d403d27548ec93eb33c2bee9 /drivers/gpu/nvgpu/gk20a/channel_gk20a.c
parent	ed220567798156aee74291044a6b2d9586cdee71 (diff)