diff options
author | Terje Bergstrom <tbergstrom@nvidia.com> | 2017-01-26 18:53:51 -0500 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2017-01-30 12:53:43 -0500 |
commit | cf8d9ccf8e8878431d93605e3e1b7c0211479cfe (patch) | |
tree | eb3fd7db26630734d403d27548ec93eb33c2bee9 /drivers/gpu/nvgpu/gk20a/channel_gk20a.c | |
parent | ed220567798156aee74291044a6b2d9586cdee71 (diff) |
gpu: nvgpu: Base channel watchdog on gp_get
Instead of checking if a job is complete, only check that channel is
making progress by checking its gp_get is advancing.
This will make the watchdog conservative. Previously a whole job had
x seconds to complete. Now channel has x seconds to get host to
consume each push buffer segment.
Bug 1861838
Bug 200273419
Bug 200263100
Change-Id: I70adc1f50301bce8db7dac675771c251c0f11b70
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/1294850
Reviewed-by: Automatic_Commit_Validation_User
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/channel_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 24 |
1 files changed, 9 insertions, 15 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 1ee9b59a..e272b130 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c | |||
@@ -2078,8 +2078,7 @@ static void trace_write_pushbuffer_range(struct channel_gk20a *c, | |||
2078 | nvgpu_kfree(g); | 2078 | nvgpu_kfree(g); |
2079 | } | 2079 | } |
2080 | 2080 | ||
2081 | static void gk20a_channel_timeout_start(struct channel_gk20a *ch, | 2081 | static void gk20a_channel_timeout_start(struct channel_gk20a *ch) |
2082 | struct channel_gk20a_job *job) | ||
2083 | { | 2082 | { |
2084 | struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev); | 2083 | struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev); |
2085 | 2084 | ||
@@ -2096,7 +2095,7 @@ static void gk20a_channel_timeout_start(struct channel_gk20a *ch, | |||
2096 | return; | 2095 | return; |
2097 | } | 2096 | } |
2098 | 2097 | ||
2099 | ch->timeout.job = job; | 2098 | ch->timeout.gp_get = gk20a_userd_gp_get(ch->g, ch); |
2100 | ch->timeout.initialized = true; | 2099 | ch->timeout.initialized = true; |
2101 | raw_spin_unlock(&ch->timeout.lock); | 2100 | raw_spin_unlock(&ch->timeout.lock); |
2102 | 2101 | ||
@@ -2150,7 +2149,7 @@ void gk20a_channel_timeout_restart_all_channels(struct gk20a *g) | |||
2150 | 2149 | ||
2151 | static void gk20a_channel_timeout_handler(struct work_struct *work) | 2150 | static void gk20a_channel_timeout_handler(struct work_struct *work) |
2152 | { | 2151 | { |
2153 | struct channel_gk20a_job *job; | 2152 | u32 gp_get; |
2154 | struct gk20a *g; | 2153 | struct gk20a *g; |
2155 | struct channel_gk20a *ch; | 2154 | struct channel_gk20a *ch; |
2156 | 2155 | ||
@@ -2170,23 +2169,18 @@ static void gk20a_channel_timeout_handler(struct work_struct *work) | |||
2170 | /* Need global lock since multiple channels can timeout at a time */ | 2169 | /* Need global lock since multiple channels can timeout at a time */ |
2171 | mutex_lock(&g->ch_wdt_lock); | 2170 | mutex_lock(&g->ch_wdt_lock); |
2172 | 2171 | ||
2173 | gk20a_err(dev_from_gk20a(g), "Possible job timeout on ch=%d", | ||
2174 | ch->hw_chid); | ||
2175 | |||
2176 | /* Get timed out job and reset the timer */ | 2172 | /* Get timed out job and reset the timer */ |
2177 | raw_spin_lock(&ch->timeout.lock); | 2173 | raw_spin_lock(&ch->timeout.lock); |
2178 | job = ch->timeout.job; | 2174 | gp_get = ch->timeout.gp_get; |
2179 | ch->timeout.initialized = false; | 2175 | ch->timeout.initialized = false; |
2180 | raw_spin_unlock(&ch->timeout.lock); | 2176 | raw_spin_unlock(&ch->timeout.lock); |
2181 | 2177 | ||
2182 | if (gk20a_fence_is_expired(job->post_fence)) { | 2178 | if (gk20a_userd_gp_get(ch->g, ch) != gp_get) { |
2183 | gk20a_err(dev_from_gk20a(g), | 2179 | gk20a_channel_timeout_start(ch); |
2184 | "Timed out fence is expired on c=%d!", | ||
2185 | ch->hw_chid); | ||
2186 | goto fail_unlock; | 2180 | goto fail_unlock; |
2187 | } | 2181 | } |
2188 | 2182 | ||
2189 | gk20a_err(dev_from_gk20a(g), "Confirmed: job on channel %d timed out", | 2183 | gk20a_err(dev_from_gk20a(g), "Job on channel %d timed out", |
2190 | ch->hw_chid); | 2184 | ch->hw_chid); |
2191 | 2185 | ||
2192 | gk20a_debug_dump(g->dev); | 2186 | gk20a_debug_dump(g->dev); |
@@ -2275,7 +2269,7 @@ static int gk20a_channel_add_job(struct channel_gk20a *c, | |||
2275 | job->num_mapped_buffers = num_mapped_buffers; | 2269 | job->num_mapped_buffers = num_mapped_buffers; |
2276 | job->mapped_buffers = mapped_buffers; | 2270 | job->mapped_buffers = mapped_buffers; |
2277 | 2271 | ||
2278 | gk20a_channel_timeout_start(c, job); | 2272 | gk20a_channel_timeout_start(c); |
2279 | 2273 | ||
2280 | if (!pre_alloc_enabled) | 2274 | if (!pre_alloc_enabled) |
2281 | channel_gk20a_joblist_lock(c); | 2275 | channel_gk20a_joblist_lock(c); |
@@ -2357,7 +2351,7 @@ static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c, | |||
2357 | 2351 | ||
2358 | completed = gk20a_fence_is_expired(job->post_fence); | 2352 | completed = gk20a_fence_is_expired(job->post_fence); |
2359 | if (!completed) { | 2353 | if (!completed) { |
2360 | gk20a_channel_timeout_start(c, job); | 2354 | gk20a_channel_timeout_start(c); |
2361 | break; | 2355 | break; |
2362 | } | 2356 | } |
2363 | 2357 | ||