gpu: nvgpu: simplify job semaphore release in abort

Instead of looping all jobs and releasing their semaphores separately, do just one semaphore release. All the jobs are using the same sema index, and the final, maximum value of it is known. Move also this resetting into ch->sync->set_min_eq_max() to be consistent with syncpoints. Change-Id: I03601aae67db0a65750c8df6b43387c042d383bd Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1680362 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Alex Waterman <alexw@nvidia.com> Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: Konsta Holtta <kholtta@nvidia.com> 2018-03-22 09:47:43 -0400
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2018-03-28 15:13:07 -0400
commit: d20a501dcbf2e29d5d319670485f273afa3aefad (patch)
tree: 22d3443589f0cf3186ef2c4096d0f82569b5401b /drivers
parent: 9b70ad24934988141c585d9fd85c59f5c9d58a38 (diff)
3 files changed, 30 insertions, 55 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 5befe086..8b144864 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -185,10 +185,6 @@ int gk20a_disable_channel_tsg(struct gk20a *g, struct channel_gk20a *ch)
 void gk20a_channel_abort_clean_up(struct channel_gk20a *ch)
 {
-        struct channel_gk20a_job *job, *n;
-        bool released_job_semaphore = false;
-        bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(ch);
        /* synchronize with actual job cleanup */
        nvgpu_mutex_acquire(&ch->joblist.cleanup_lock);
@@ -200,48 +196,8 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch)
                ch->user_sync->set_safe_state(ch->user_sync);
        nvgpu_mutex_release(&ch->sync_lock);
-        /* release all job semaphores (applies only to jobs that use
-           semaphore synchronization) */
-        channel_gk20a_joblist_lock(ch);
-        if (pre_alloc_enabled) {
-                int tmp_get = ch->joblist.pre_alloc.get;
-                int put = ch->joblist.pre_alloc.put;
-                /*
-                 * ensure put is read before any subsequent reads.
-                 * see corresponding nvgpu_smp_wmb in gk20a_channel_add_job()
-                 */
-                nvgpu_smp_rmb();
-                while (tmp_get != put) {
-                        job = &ch->joblist.pre_alloc.jobs[tmp_get];
-                        if (job->post_fence->semaphore) {
-                                nvgpu_semaphore_reset(
-                                        job->post_fence->semaphore,
-                                        ch->hw_sema);
-                                released_job_semaphore = true;
-                        }
-                        tmp_get = (tmp_get + 1) % ch->joblist.pre_alloc.length;
-                }
-        } else {
-                nvgpu_list_for_each_entry_safe(job, n,
-                                &ch->joblist.dynamic.jobs,
-                                channel_gk20a_job, list) {
-                        if (job->post_fence->semaphore) {
-                                nvgpu_semaphore_reset(
-                                        job->post_fence->semaphore,
-                                        ch->hw_sema);
-                                released_job_semaphore = true;
-                        }
-                }
-        }
-        channel_gk20a_joblist_unlock(ch);
        nvgpu_mutex_release(&ch->joblist.cleanup_lock);
-        if (released_job_semaphore)
-                nvgpu_cond_broadcast_interruptible(&ch->semaphore_wq);
        /*
         * When closing the channel, this scheduled update holds one ref which
         * is waited for before advancing with freeing.
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
index d87b8661..bf467210 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -646,7 +646,18 @@ static int gk20a_channel_semaphore_incr_user(
 static void gk20a_channel_semaphore_set_min_eq_max(struct gk20a_channel_sync *s)
 {
-        /* Nothing to do. */
+        struct gk20a_channel_semaphore *sp =
+                container_of(s, struct gk20a_channel_semaphore, ops);
+        struct channel_gk20a *c = sp->c;
+        bool updated;
+        if (!c->hw_sema)
+                return;
+        updated = nvgpu_semaphore_reset(c->hw_sema);
+        if (updated)
+                nvgpu_cond_broadcast_interruptible(&c->semaphore_wq);
 }
 static void gk20a_channel_semaphore_set_safe_state(struct gk20a_channel_sync *s)
diff --git a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
index 373c5745..9ab6cc67 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
@@ -276,28 +276,36 @@ static inline bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s)
 }
 /*
- * Fast-forward the hw sema to the threshold represented by sema_thresh.
+ * Fast-forward the hw sema to its tracked max value.
+ *
+ * Return true if the sema wasn't at the max value and needed updating, false
+ * otherwise.
 */
-static inline void nvgpu_semaphore_reset(struct nvgpu_semaphore *sema_thresh,
+static inline bool nvgpu_semaphore_reset(struct nvgpu_semaphore_int *hw_sema)
-                                             struct nvgpu_semaphore_int *hw_sema)
 {
-        u32 current_val;
+        u32 threshold = (u32)nvgpu_atomic_read(&hw_sema->next_value);
-        u32 threshold = nvgpu_semaphore_get_value(sema_thresh);
+        u32 current_val = __nvgpu_semaphore_read(hw_sema);
-        current_val = nvgpu_semaphore_read(sema_thresh);
        /*
         * If the semaphore has already reached the value we would write then
-         * this is really just a NO-OP.
+         * this is really just a NO-OP. However, the sema value shouldn't be
+         * more than what we expect to be the max.
         */
-        if (__nvgpu_semaphore_value_released(threshold, current_val))
-                return;
+        if (WARN_ON(__nvgpu_semaphore_value_released(threshold + 1,
+                                                     current_val)))
+                return false;
+        if (current_val == threshold)
+                return false;
        nvgpu_mem_wr(hw_sema->ch->g, &hw_sema->location.pool->rw_mem,
                        hw_sema->location.offset, threshold);
        gpu_sema_verbose_dbg(hw_sema->ch->g, "(c=%d) RESET %u -> %u",
                        hw_sema->ch->chid, current_val, threshold);
+        return true;
 }
 /*
author	Konsta Holtta <kholtta@nvidia.com>	2018-03-22 09:47:43 -0400
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2018-03-28 15:13:07 -0400
commit	d20a501dcbf2e29d5d319670485f273afa3aefad (patch)
tree	22d3443589f0cf3186ef2c4096d0f82569b5401b /drivers
parent	9b70ad24934988141c585d9fd85c59f5c9d58a38 (diff)

diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 5befe086..8b144864 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -185,10 +185,6 @@ int gk20a_disable_channel_tsg(struct gk20a g, struct channel_gk20a ch)
185		185
186	void gk20a_channel_abort_clean_up(struct channel_gk20a *ch)	186	void gk20a_channel_abort_clean_up(struct channel_gk20a *ch)
187	{	187	{
188	struct channel_gk20a_job job, n;
189	bool released_job_semaphore = false;
190	bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(ch);
191
192	/* synchronize with actual job cleanup */	188	/* synchronize with actual job cleanup */
193	nvgpu_mutex_acquire(&ch->joblist.cleanup_lock);	189	nvgpu_mutex_acquire(&ch->joblist.cleanup_lock);
194		190
@@ -200,48 +196,8 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch)
200	ch->user_sync->set_safe_state(ch->user_sync);	196	ch->user_sync->set_safe_state(ch->user_sync);
201	nvgpu_mutex_release(&ch->sync_lock);	197	nvgpu_mutex_release(&ch->sync_lock);
202		198
203	/* release all job semaphores (applies only to jobs that use
204	semaphore synchronization) */
205	channel_gk20a_joblist_lock(ch);
206	if (pre_alloc_enabled) {
207	int tmp_get = ch->joblist.pre_alloc.get;
208	int put = ch->joblist.pre_alloc.put;
209
210	/*
211	* ensure put is read before any subsequent reads.
212	* see corresponding nvgpu_smp_wmb in gk20a_channel_add_job()
213	*/
214	nvgpu_smp_rmb();
215
216	while (tmp_get != put) {
217	job = &ch->joblist.pre_alloc.jobs[tmp_get];
218	if (job->post_fence->semaphore) {
219	nvgpu_semaphore_reset(
220	job->post_fence->semaphore,
221	ch->hw_sema);
222	released_job_semaphore = true;
223	}
224	tmp_get = (tmp_get + 1) % ch->joblist.pre_alloc.length;
225	}
226	} else {
227	nvgpu_list_for_each_entry_safe(job, n,
228	&ch->joblist.dynamic.jobs,
229	channel_gk20a_job, list) {
230	if (job->post_fence->semaphore) {
231	nvgpu_semaphore_reset(
232	job->post_fence->semaphore,
233	ch->hw_sema);
234	released_job_semaphore = true;
235	}
236	}
237	}
238	channel_gk20a_joblist_unlock(ch);
239
240	nvgpu_mutex_release(&ch->joblist.cleanup_lock);	199	nvgpu_mutex_release(&ch->joblist.cleanup_lock);
241		200
242	if (released_job_semaphore)
243	nvgpu_cond_broadcast_interruptible(&ch->semaphore_wq);
244
245	/*	201	/*
246	* When closing the channel, this scheduled update holds one ref which	202	* When closing the channel, this scheduled update holds one ref which
247	* is waited for before advancing with freeing.	203	* is waited for before advancing with freeing.


diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c index d87b8661..bf467210 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -646,7 +646,18 @@ static int gk20a_channel_semaphore_incr_user(
646		646
647	static void gk20a_channel_semaphore_set_min_eq_max(struct gk20a_channel_sync *s)	647	static void gk20a_channel_semaphore_set_min_eq_max(struct gk20a_channel_sync *s)
648	{	648	{
649	/* Nothing to do. */	649	struct gk20a_channel_semaphore *sp =
		650	container_of(s, struct gk20a_channel_semaphore, ops);
		651	struct channel_gk20a *c = sp->c;
		652	bool updated;
		653
		654	if (!c->hw_sema)
		655	return;
		656
		657	updated = nvgpu_semaphore_reset(c->hw_sema);
		658
		659	if (updated)
		660	nvgpu_cond_broadcast_interruptible(&c->semaphore_wq);
650	}	661	}
651		662
652	static void gk20a_channel_semaphore_set_safe_state(struct gk20a_channel_sync *s)	663	static void gk20a_channel_semaphore_set_safe_state(struct gk20a_channel_sync *s)


diff --git a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h index 373c5745..9ab6cc67 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h +++ b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
@@ -276,28 +276,36 @@ static inline bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s)
276	}	276	}
277		277
278	/*	278	/*
279	* Fast-forward the hw sema to the threshold represented by sema_thresh.	279	* Fast-forward the hw sema to its tracked max value.
		280	*
		281	* Return true if the sema wasn't at the max value and needed updating, false
		282	* otherwise.
280	*/	283	*/
281	static inline void nvgpu_semaphore_reset(struct nvgpu_semaphore *sema_thresh,	284	static inline bool nvgpu_semaphore_reset(struct nvgpu_semaphore_int *hw_sema)
282	struct nvgpu_semaphore_int *hw_sema)
283	{	285	{
284	u32 current_val;	286	u32 threshold = (u32)nvgpu_atomic_read(&hw_sema->next_value);
285	u32 threshold = nvgpu_semaphore_get_value(sema_thresh);	287	u32 current_val = __nvgpu_semaphore_read(hw_sema);
286
287	current_val = nvgpu_semaphore_read(sema_thresh);
288		288
289	/*	289	/*
290	* If the semaphore has already reached the value we would write then	290	* If the semaphore has already reached the value we would write then
291	* this is really just a NO-OP.	291	* this is really just a NO-OP. However, the sema value shouldn't be
		292	* more than what we expect to be the max.
292	*/	293	*/
293	if (__nvgpu_semaphore_value_released(threshold, current_val))	294
294	return;	295	if (WARN_ON(__nvgpu_semaphore_value_released(threshold + 1,
		296	current_val)))
		297	return false;
		298
		299	if (current_val == threshold)
		300	return false;
295		301
296	nvgpu_mem_wr(hw_sema->ch->g, &hw_sema->location.pool->rw_mem,	302	nvgpu_mem_wr(hw_sema->ch->g, &hw_sema->location.pool->rw_mem,
297	hw_sema->location.offset, threshold);	303	hw_sema->location.offset, threshold);
298		304
299	gpu_sema_verbose_dbg(hw_sema->ch->g, "(c=%d) RESET %u -> %u",	305	gpu_sema_verbose_dbg(hw_sema->ch->g, "(c=%d) RESET %u -> %u",
300	hw_sema->ch->chid, current_val, threshold);	306	hw_sema->ch->chid, current_val, threshold);
		307
		308	return true;
301	}	309	}
302		310
303	/*	311	/*