summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorKonsta Holtta <kholtta@nvidia.com>2018-03-22 09:47:43 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2018-03-28 15:13:07 -0400
commitd20a501dcbf2e29d5d319670485f273afa3aefad (patch)
tree22d3443589f0cf3186ef2c4096d0f82569b5401b /drivers
parent9b70ad24934988141c585d9fd85c59f5c9d58a38 (diff)
gpu: nvgpu: simplify job semaphore release in abort
Instead of looping all jobs and releasing their semaphores separately, do just one semaphore release. All the jobs are using the same sema index, and the final, maximum value of it is known. Move also this resetting into ch->sync->set_min_eq_max() to be consistent with syncpoints. Change-Id: I03601aae67db0a65750c8df6b43387c042d383bd Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1680362 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Alex Waterman <alexw@nvidia.com> Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c44
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c13
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/semaphore.h28
3 files changed, 30 insertions, 55 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 5befe086..8b144864 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -185,10 +185,6 @@ int gk20a_disable_channel_tsg(struct gk20a *g, struct channel_gk20a *ch)
185 185
186void gk20a_channel_abort_clean_up(struct channel_gk20a *ch) 186void gk20a_channel_abort_clean_up(struct channel_gk20a *ch)
187{ 187{
188 struct channel_gk20a_job *job, *n;
189 bool released_job_semaphore = false;
190 bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(ch);
191
192 /* synchronize with actual job cleanup */ 188 /* synchronize with actual job cleanup */
193 nvgpu_mutex_acquire(&ch->joblist.cleanup_lock); 189 nvgpu_mutex_acquire(&ch->joblist.cleanup_lock);
194 190
@@ -200,48 +196,8 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch)
200 ch->user_sync->set_safe_state(ch->user_sync); 196 ch->user_sync->set_safe_state(ch->user_sync);
201 nvgpu_mutex_release(&ch->sync_lock); 197 nvgpu_mutex_release(&ch->sync_lock);
202 198
203 /* release all job semaphores (applies only to jobs that use
204 semaphore synchronization) */
205 channel_gk20a_joblist_lock(ch);
206 if (pre_alloc_enabled) {
207 int tmp_get = ch->joblist.pre_alloc.get;
208 int put = ch->joblist.pre_alloc.put;
209
210 /*
211 * ensure put is read before any subsequent reads.
212 * see corresponding nvgpu_smp_wmb in gk20a_channel_add_job()
213 */
214 nvgpu_smp_rmb();
215
216 while (tmp_get != put) {
217 job = &ch->joblist.pre_alloc.jobs[tmp_get];
218 if (job->post_fence->semaphore) {
219 nvgpu_semaphore_reset(
220 job->post_fence->semaphore,
221 ch->hw_sema);
222 released_job_semaphore = true;
223 }
224 tmp_get = (tmp_get + 1) % ch->joblist.pre_alloc.length;
225 }
226 } else {
227 nvgpu_list_for_each_entry_safe(job, n,
228 &ch->joblist.dynamic.jobs,
229 channel_gk20a_job, list) {
230 if (job->post_fence->semaphore) {
231 nvgpu_semaphore_reset(
232 job->post_fence->semaphore,
233 ch->hw_sema);
234 released_job_semaphore = true;
235 }
236 }
237 }
238 channel_gk20a_joblist_unlock(ch);
239
240 nvgpu_mutex_release(&ch->joblist.cleanup_lock); 199 nvgpu_mutex_release(&ch->joblist.cleanup_lock);
241 200
242 if (released_job_semaphore)
243 nvgpu_cond_broadcast_interruptible(&ch->semaphore_wq);
244
245 /* 201 /*
246 * When closing the channel, this scheduled update holds one ref which 202 * When closing the channel, this scheduled update holds one ref which
247 * is waited for before advancing with freeing. 203 * is waited for before advancing with freeing.
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
index d87b8661..bf467210 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -646,7 +646,18 @@ static int gk20a_channel_semaphore_incr_user(
646 646
647static void gk20a_channel_semaphore_set_min_eq_max(struct gk20a_channel_sync *s) 647static void gk20a_channel_semaphore_set_min_eq_max(struct gk20a_channel_sync *s)
648{ 648{
649 /* Nothing to do. */ 649 struct gk20a_channel_semaphore *sp =
650 container_of(s, struct gk20a_channel_semaphore, ops);
651 struct channel_gk20a *c = sp->c;
652 bool updated;
653
654 if (!c->hw_sema)
655 return;
656
657 updated = nvgpu_semaphore_reset(c->hw_sema);
658
659 if (updated)
660 nvgpu_cond_broadcast_interruptible(&c->semaphore_wq);
650} 661}
651 662
652static void gk20a_channel_semaphore_set_safe_state(struct gk20a_channel_sync *s) 663static void gk20a_channel_semaphore_set_safe_state(struct gk20a_channel_sync *s)
diff --git a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
index 373c5745..9ab6cc67 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
@@ -276,28 +276,36 @@ static inline bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s)
276} 276}
277 277
278/* 278/*
279 * Fast-forward the hw sema to the threshold represented by sema_thresh. 279 * Fast-forward the hw sema to its tracked max value.
280 *
281 * Return true if the sema wasn't at the max value and needed updating, false
282 * otherwise.
280 */ 283 */
281static inline void nvgpu_semaphore_reset(struct nvgpu_semaphore *sema_thresh, 284static inline bool nvgpu_semaphore_reset(struct nvgpu_semaphore_int *hw_sema)
282 struct nvgpu_semaphore_int *hw_sema)
283{ 285{
284 u32 current_val; 286 u32 threshold = (u32)nvgpu_atomic_read(&hw_sema->next_value);
285 u32 threshold = nvgpu_semaphore_get_value(sema_thresh); 287 u32 current_val = __nvgpu_semaphore_read(hw_sema);
286
287 current_val = nvgpu_semaphore_read(sema_thresh);
288 288
289 /* 289 /*
290 * If the semaphore has already reached the value we would write then 290 * If the semaphore has already reached the value we would write then
291 * this is really just a NO-OP. 291 * this is really just a NO-OP. However, the sema value shouldn't be
292 * more than what we expect to be the max.
292 */ 293 */
293 if (__nvgpu_semaphore_value_released(threshold, current_val)) 294
294 return; 295 if (WARN_ON(__nvgpu_semaphore_value_released(threshold + 1,
296 current_val)))
297 return false;
298
299 if (current_val == threshold)
300 return false;
295 301
296 nvgpu_mem_wr(hw_sema->ch->g, &hw_sema->location.pool->rw_mem, 302 nvgpu_mem_wr(hw_sema->ch->g, &hw_sema->location.pool->rw_mem,
297 hw_sema->location.offset, threshold); 303 hw_sema->location.offset, threshold);
298 304
299 gpu_sema_verbose_dbg(hw_sema->ch->g, "(c=%d) RESET %u -> %u", 305 gpu_sema_verbose_dbg(hw_sema->ch->g, "(c=%d) RESET %u -> %u",
300 hw_sema->ch->chid, current_val, threshold); 306 hw_sema->ch->chid, current_val, threshold);
307
308 return true;
301} 309}
302 310
303/* 311/*