3 files changed, 30 insertions, 55 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 5befe086..8b144864 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -185,10 +185,6 @@ int gk20a_disable_channel_tsg(struct gk20a *g, struct channel_gk20a *ch)
 void gk20a_channel_abort_clean_up(struct channel_gk20a *ch)
 {
-        struct channel_gk20a_job *job, *n;
-        bool released_job_semaphore = false;
-        bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(ch);
        /* synchronize with actual job cleanup */
        nvgpu_mutex_acquire(&ch->joblist.cleanup_lock);
@@ -200,48 +196,8 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch)
                ch->user_sync->set_safe_state(ch->user_sync);
        nvgpu_mutex_release(&ch->sync_lock);
-        /* release all job semaphores (applies only to jobs that use
-           semaphore synchronization) */
-        channel_gk20a_joblist_lock(ch);
-        if (pre_alloc_enabled) {
-                int tmp_get = ch->joblist.pre_alloc.get;
-                int put = ch->joblist.pre_alloc.put;
-                /*
-                 * ensure put is read before any subsequent reads.
-                 * see corresponding nvgpu_smp_wmb in gk20a_channel_add_job()
-                 */
-                nvgpu_smp_rmb();
-                while (tmp_get != put) {
-                        job = &ch->joblist.pre_alloc.jobs[tmp_get];
-                        if (job->post_fence->semaphore) {
-                                nvgpu_semaphore_reset(
-                                        job->post_fence->semaphore,
-                                        ch->hw_sema);
-                                released_job_semaphore = true;
-                        }
-                        tmp_get = (tmp_get + 1) % ch->joblist.pre_alloc.length;
-                }
-        } else {
-                nvgpu_list_for_each_entry_safe(job, n,
-                                &ch->joblist.dynamic.jobs,
-                                channel_gk20a_job, list) {
-                        if (job->post_fence->semaphore) {
-                                nvgpu_semaphore_reset(
-                                        job->post_fence->semaphore,
-                                        ch->hw_sema);
-                                released_job_semaphore = true;
-                        }
-                }
-        }
-        channel_gk20a_joblist_unlock(ch);
        nvgpu_mutex_release(&ch->joblist.cleanup_lock);
-        if (released_job_semaphore)
-                nvgpu_cond_broadcast_interruptible(&ch->semaphore_wq);
        /*
         * When closing the channel, this scheduled update holds one ref which
         * is waited for before advancing with freeing.
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
index d87b8661..bf467210 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -646,7 +646,18 @@ static int gk20a_channel_semaphore_incr_user(
 static void gk20a_channel_semaphore_set_min_eq_max(struct gk20a_channel_sync *s)
 {
-        /* Nothing to do. */
+        struct gk20a_channel_semaphore *sp =
+                container_of(s, struct gk20a_channel_semaphore, ops);
+        struct channel_gk20a *c = sp->c;
+        bool updated;
+        if (!c->hw_sema)
+                return;
+        updated = nvgpu_semaphore_reset(c->hw_sema);
+        if (updated)
+                nvgpu_cond_broadcast_interruptible(&c->semaphore_wq);
 }
 static void gk20a_channel_semaphore_set_safe_state(struct gk20a_channel_sync *s)
diff --git a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
index 373c5745..9ab6cc67 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
@@ -276,28 +276,36 @@ static inline bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s)
 }
 /*
- * Fast-forward the hw sema to the threshold represented by sema_thresh.
+ * Fast-forward the hw sema to its tracked max value.
+ *
+ * Return true if the sema wasn't at the max value and needed updating, false
+ * otherwise.
 */
-static inline void nvgpu_semaphore_reset(struct nvgpu_semaphore *sema_thresh,
+static inline bool nvgpu_semaphore_reset(struct nvgpu_semaphore_int *hw_sema)
-                                             struct nvgpu_semaphore_int *hw_sema)
 {
-        u32 current_val;
+        u32 threshold = (u32)nvgpu_atomic_read(&hw_sema->next_value);
-        u32 threshold = nvgpu_semaphore_get_value(sema_thresh);
+        u32 current_val = __nvgpu_semaphore_read(hw_sema);
-        current_val = nvgpu_semaphore_read(sema_thresh);
        /*
         * If the semaphore has already reached the value we would write then
-         * this is really just a NO-OP.
+         * this is really just a NO-OP. However, the sema value shouldn't be
+         * more than what we expect to be the max.
         */
-        if (__nvgpu_semaphore_value_released(threshold, current_val))
-                return;
+        if (WARN_ON(__nvgpu_semaphore_value_released(threshold + 1,
+                                                     current_val)))
+                return false;
+        if (current_val == threshold)
+                return false;
        nvgpu_mem_wr(hw_sema->ch->g, &hw_sema->location.pool->rw_mem,
                        hw_sema->location.offset, threshold);
        gpu_sema_verbose_dbg(hw_sema->ch->g, "(c=%d) RESET %u -> %u",
                        hw_sema->ch->chid, current_val, threshold);
+        return true;
 }
 /*

diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 5befe086..8b144864 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -185,10 +185,6 @@ int gk20a_disable_channel_tsg(struct gk20a g, struct channel_gk20a ch)
185		185
186	void gk20a_channel_abort_clean_up(struct channel_gk20a *ch)	186	void gk20a_channel_abort_clean_up(struct channel_gk20a *ch)
187	{	187	{
188	struct channel_gk20a_job job, n;
189	bool released_job_semaphore = false;
190	bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(ch);
191
192	/* synchronize with actual job cleanup */	188	/* synchronize with actual job cleanup */
193	nvgpu_mutex_acquire(&ch->joblist.cleanup_lock);	189	nvgpu_mutex_acquire(&ch->joblist.cleanup_lock);
194		190
@@ -200,48 +196,8 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch)
200	ch->user_sync->set_safe_state(ch->user_sync);	196	ch->user_sync->set_safe_state(ch->user_sync);
201	nvgpu_mutex_release(&ch->sync_lock);	197	nvgpu_mutex_release(&ch->sync_lock);
202		198
203	/* release all job semaphores (applies only to jobs that use
204	semaphore synchronization) */
205	channel_gk20a_joblist_lock(ch);
206	if (pre_alloc_enabled) {
207	int tmp_get = ch->joblist.pre_alloc.get;
208	int put = ch->joblist.pre_alloc.put;
209
210	/*
211	* ensure put is read before any subsequent reads.
212	* see corresponding nvgpu_smp_wmb in gk20a_channel_add_job()
213	*/
214	nvgpu_smp_rmb();
215
216	while (tmp_get != put) {
217	job = &ch->joblist.pre_alloc.jobs[tmp_get];
218	if (job->post_fence->semaphore) {
219	nvgpu_semaphore_reset(
220	job->post_fence->semaphore,
221	ch->hw_sema);
222	released_job_semaphore = true;
223	}
224	tmp_get = (tmp_get + 1) % ch->joblist.pre_alloc.length;
225	}
226	} else {
227	nvgpu_list_for_each_entry_safe(job, n,
228	&ch->joblist.dynamic.jobs,
229	channel_gk20a_job, list) {
230	if (job->post_fence->semaphore) {
231	nvgpu_semaphore_reset(
232	job->post_fence->semaphore,
233	ch->hw_sema);
234	released_job_semaphore = true;
235	}
236	}
237	}
238	channel_gk20a_joblist_unlock(ch);
239
240	nvgpu_mutex_release(&ch->joblist.cleanup_lock);	199	nvgpu_mutex_release(&ch->joblist.cleanup_lock);
241		200
242	if (released_job_semaphore)
243	nvgpu_cond_broadcast_interruptible(&ch->semaphore_wq);
244
245	/*	201	/*
246	* When closing the channel, this scheduled update holds one ref which	202	* When closing the channel, this scheduled update holds one ref which
247	* is waited for before advancing with freeing.	203	* is waited for before advancing with freeing.


diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c index d87b8661..bf467210 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -646,7 +646,18 @@ static int gk20a_channel_semaphore_incr_user(
646		646
647	static void gk20a_channel_semaphore_set_min_eq_max(struct gk20a_channel_sync *s)	647	static void gk20a_channel_semaphore_set_min_eq_max(struct gk20a_channel_sync *s)
648	{	648	{
649	/* Nothing to do. */	649	struct gk20a_channel_semaphore *sp =
		650	container_of(s, struct gk20a_channel_semaphore, ops);
		651	struct channel_gk20a *c = sp->c;
		652	bool updated;
		653
		654	if (!c->hw_sema)
		655	return;
		656
		657	updated = nvgpu_semaphore_reset(c->hw_sema);
		658
		659	if (updated)
		660	nvgpu_cond_broadcast_interruptible(&c->semaphore_wq);
650	}	661	}
651		662
652	static void gk20a_channel_semaphore_set_safe_state(struct gk20a_channel_sync *s)	663	static void gk20a_channel_semaphore_set_safe_state(struct gk20a_channel_sync *s)


diff --git a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h index 373c5745..9ab6cc67 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h +++ b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
@@ -276,28 +276,36 @@ static inline bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s)
276	}	276	}
277		277
278	/*	278	/*
279	* Fast-forward the hw sema to the threshold represented by sema_thresh.	279	* Fast-forward the hw sema to its tracked max value.
		280	*
		281	* Return true if the sema wasn't at the max value and needed updating, false
		282	* otherwise.
280	*/	283	*/
281	static inline void nvgpu_semaphore_reset(struct nvgpu_semaphore *sema_thresh,	284	static inline bool nvgpu_semaphore_reset(struct nvgpu_semaphore_int *hw_sema)
282	struct nvgpu_semaphore_int *hw_sema)
283	{	285	{
284	u32 current_val;	286	u32 threshold = (u32)nvgpu_atomic_read(&hw_sema->next_value);
285	u32 threshold = nvgpu_semaphore_get_value(sema_thresh);	287	u32 current_val = __nvgpu_semaphore_read(hw_sema);
286
287	current_val = nvgpu_semaphore_read(sema_thresh);
288		288
289	/*	289	/*
290	* If the semaphore has already reached the value we would write then	290	* If the semaphore has already reached the value we would write then
291	* this is really just a NO-OP.	291	* this is really just a NO-OP. However, the sema value shouldn't be
		292	* more than what we expect to be the max.
292	*/	293	*/
293	if (__nvgpu_semaphore_value_released(threshold, current_val))	294
294	return;	295	if (WARN_ON(__nvgpu_semaphore_value_released(threshold + 1,
		296	current_val)))
		297	return false;
		298
		299	if (current_val == threshold)
		300	return false;
295		301
296	nvgpu_mem_wr(hw_sema->ch->g, &hw_sema->location.pool->rw_mem,	302	nvgpu_mem_wr(hw_sema->ch->g, &hw_sema->location.pool->rw_mem,
297	hw_sema->location.offset, threshold);	303	hw_sema->location.offset, threshold);
298		304
299	gpu_sema_verbose_dbg(hw_sema->ch->g, "(c=%d) RESET %u -> %u",	305	gpu_sema_verbose_dbg(hw_sema->ch->g, "(c=%d) RESET %u -> %u",
300	hw_sema->ch->chid, current_val, threshold);	306	hw_sema->ch->chid, current_val, threshold);
		307
		308	return true;
301	}	309	}
302		310
303	/*	311	/*