summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
diff options
context:
space:
mode:
authorTerje Bergstrom <tbergstrom@nvidia.com>2017-01-26 18:53:51 -0500
committermobile promotions <svcmobile_promotions@nvidia.com>2017-01-30 12:53:43 -0500
commitcf8d9ccf8e8878431d93605e3e1b7c0211479cfe (patch)
treeeb3fd7db26630734d403d27548ec93eb33c2bee9 /drivers/gpu/nvgpu/gk20a/channel_gk20a.c
parented220567798156aee74291044a6b2d9586cdee71 (diff)
gpu: nvgpu: Base channel watchdog on gp_get
Instead of checking if a job is complete, only check that channel is making progress by checking its gp_get is advancing. This will make the watchdog conservative. Previously a whole job had x seconds to complete. Now channel has x seconds to get host to consume each push buffer segment. Bug 1861838 Bug 200273419 Bug 200263100 Change-Id: I70adc1f50301bce8db7dac675771c251c0f11b70 Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/1294850 Reviewed-by: Automatic_Commit_Validation_User
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/channel_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c24
1 files changed, 9 insertions, 15 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 1ee9b59a..e272b130 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -2078,8 +2078,7 @@ static void trace_write_pushbuffer_range(struct channel_gk20a *c,
2078 nvgpu_kfree(g); 2078 nvgpu_kfree(g);
2079} 2079}
2080 2080
2081static void gk20a_channel_timeout_start(struct channel_gk20a *ch, 2081static void gk20a_channel_timeout_start(struct channel_gk20a *ch)
2082 struct channel_gk20a_job *job)
2083{ 2082{
2084 struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev); 2083 struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev);
2085 2084
@@ -2096,7 +2095,7 @@ static void gk20a_channel_timeout_start(struct channel_gk20a *ch,
2096 return; 2095 return;
2097 } 2096 }
2098 2097
2099 ch->timeout.job = job; 2098 ch->timeout.gp_get = gk20a_userd_gp_get(ch->g, ch);
2100 ch->timeout.initialized = true; 2099 ch->timeout.initialized = true;
2101 raw_spin_unlock(&ch->timeout.lock); 2100 raw_spin_unlock(&ch->timeout.lock);
2102 2101
@@ -2150,7 +2149,7 @@ void gk20a_channel_timeout_restart_all_channels(struct gk20a *g)
2150 2149
2151static void gk20a_channel_timeout_handler(struct work_struct *work) 2150static void gk20a_channel_timeout_handler(struct work_struct *work)
2152{ 2151{
2153 struct channel_gk20a_job *job; 2152 u32 gp_get;
2154 struct gk20a *g; 2153 struct gk20a *g;
2155 struct channel_gk20a *ch; 2154 struct channel_gk20a *ch;
2156 2155
@@ -2170,23 +2169,18 @@ static void gk20a_channel_timeout_handler(struct work_struct *work)
2170 /* Need global lock since multiple channels can timeout at a time */ 2169 /* Need global lock since multiple channels can timeout at a time */
2171 mutex_lock(&g->ch_wdt_lock); 2170 mutex_lock(&g->ch_wdt_lock);
2172 2171
2173 gk20a_err(dev_from_gk20a(g), "Possible job timeout on ch=%d",
2174 ch->hw_chid);
2175
2176 /* Get timed out job and reset the timer */ 2172 /* Get timed out job and reset the timer */
2177 raw_spin_lock(&ch->timeout.lock); 2173 raw_spin_lock(&ch->timeout.lock);
2178 job = ch->timeout.job; 2174 gp_get = ch->timeout.gp_get;
2179 ch->timeout.initialized = false; 2175 ch->timeout.initialized = false;
2180 raw_spin_unlock(&ch->timeout.lock); 2176 raw_spin_unlock(&ch->timeout.lock);
2181 2177
2182 if (gk20a_fence_is_expired(job->post_fence)) { 2178 if (gk20a_userd_gp_get(ch->g, ch) != gp_get) {
2183 gk20a_err(dev_from_gk20a(g), 2179 gk20a_channel_timeout_start(ch);
2184 "Timed out fence is expired on c=%d!",
2185 ch->hw_chid);
2186 goto fail_unlock; 2180 goto fail_unlock;
2187 } 2181 }
2188 2182
2189 gk20a_err(dev_from_gk20a(g), "Confirmed: job on channel %d timed out", 2183 gk20a_err(dev_from_gk20a(g), "Job on channel %d timed out",
2190 ch->hw_chid); 2184 ch->hw_chid);
2191 2185
2192 gk20a_debug_dump(g->dev); 2186 gk20a_debug_dump(g->dev);
@@ -2275,7 +2269,7 @@ static int gk20a_channel_add_job(struct channel_gk20a *c,
2275 job->num_mapped_buffers = num_mapped_buffers; 2269 job->num_mapped_buffers = num_mapped_buffers;
2276 job->mapped_buffers = mapped_buffers; 2270 job->mapped_buffers = mapped_buffers;
2277 2271
2278 gk20a_channel_timeout_start(c, job); 2272 gk20a_channel_timeout_start(c);
2279 2273
2280 if (!pre_alloc_enabled) 2274 if (!pre_alloc_enabled)
2281 channel_gk20a_joblist_lock(c); 2275 channel_gk20a_joblist_lock(c);
@@ -2357,7 +2351,7 @@ static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
2357 2351
2358 completed = gk20a_fence_is_expired(job->post_fence); 2352 completed = gk20a_fence_is_expired(job->post_fence);
2359 if (!completed) { 2353 if (!completed) {
2360 gk20a_channel_timeout_start(c, job); 2354 gk20a_channel_timeout_start(c);
2361 break; 2355 break;
2362 } 2356 }
2363 2357