1 files changed, 82 insertions, 93 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
index 4b1be8b9..c6b55bf8 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -502,10 +502,10 @@ static void gk20a_channel_semaphore_launcher(
 static void add_sema_cmd(struct gk20a *g, struct channel_gk20a *c,
                         struct nvgpu_semaphore *s, struct priv_cmd_entry *cmd,
-                         int cmd_size, bool acquire, bool wfi)
+                         u32 offset, bool acquire, bool wfi)
 {
        int ch = c->chid;
-        u32 ob, off = cmd->off;
+        u32 ob, off = cmd->off + offset;
        u64 va;
        ob = off;
@@ -588,108 +588,79 @@ static int gk20a_channel_semaphore_wait_syncpt(
 }
 #ifdef CONFIG_SYNC
-/*
+static int semaphore_wait_fd_native(struct channel_gk20a *c, int fd,
- * Attempt a fast path for waiting on a sync_fence. Basically if the passed
+                struct priv_cmd_entry *wait_cmd)
- * sync_fence is backed by a nvgpu_semaphore then there's no reason to go
- * through the rigmarole of setting up a separate semaphore which waits on an
- * interrupt from the GPU and then triggers a worker thread to execute a SW
- * based semaphore release. Instead just have the GPU wait on the same semaphore
- * that is going to be incremented by the GPU.
- *
- * This function returns 2 possible values: -ENODEV or 0 on success. In the case
- * of -ENODEV the fastpath cannot be taken due to the fence not being backed by
- * a GPU semaphore.
- */
-static int __semaphore_wait_fd_fast_path(struct channel_gk20a *c,
-                                         struct sync_fence *fence,
-                                         struct priv_cmd_entry *wait_cmd,
-                                         struct nvgpu_semaphore **fp_sema)
 {
-        struct nvgpu_semaphore *sema;
+        struct sync_fence *sync_fence;
        int err;
+        const int wait_cmd_size = 8;
+        int num_wait_cmds;
+        int i;
-        if (!gk20a_is_sema_backed_sync_fence(fence))
+        sync_fence = gk20a_sync_fence_fdget(fd);
-                return -ENODEV;
+        if (!sync_fence)
+                return -EINVAL;
-        sema = gk20a_sync_fence_get_sema(fence);
-        /*
+        num_wait_cmds = sync_fence->num_fences;
-         * If there's no underlying sema then that means the underlying sema has
+        if (num_wait_cmds == 0) {
-         * already signaled.
+                err = 0;
-         */
+                goto put_fence;
-        if (!sema) {
-                *fp_sema = NULL;
-                return 0;
        }
-        err = gk20a_channel_alloc_priv_cmdbuf(c, 8, wait_cmd);
+        err = gk20a_channel_alloc_priv_cmdbuf(c,
-        if (err)
+                        wait_cmd_size * num_wait_cmds,
-                return err;
+                        wait_cmd);
+        if (err) {
+                nvgpu_err(c->g, "not enough priv cmd buffer space");
+                goto put_fence;
+        }
-        nvgpu_semaphore_get(sema);
+        for (i = 0; i < sync_fence->num_fences; i++) {
-        BUG_ON(!sema->incremented);
+                struct fence *f = sync_fence->cbs[i].sync_pt;
-        add_sema_cmd(c->g, c, sema, wait_cmd, 8, true, false);
+                struct sync_pt *pt = sync_pt_from_fence(f);
+                struct nvgpu_semaphore *sema;
-        /*
+                sema = gk20a_sync_pt_sema(pt);
-         * Make sure that gk20a_channel_semaphore_wait_fd() can create another
+                if (!sema) {
-         * fence with the underlying semaphore.
+                        /* expired */
-         */
+                        nvgpu_memset(c->g, wait_cmd->mem,
-        *fp_sema = sema;
+                        (wait_cmd->off + i * wait_cmd_size) * sizeof(u32),
+                                0, wait_cmd_size * sizeof(u32));
+                } else {
+                        WARN_ON(!sema->incremented);
+                        add_sema_cmd(c->g, c, sema, wait_cmd,
+                                        i * wait_cmd_size, true, false);
+                        nvgpu_semaphore_put(sema);
+                }
+        }
-        return 0;
+put_fence:
+        sync_fence_put(sync_fence);
+        return err;
 }
-#endif
-static int gk20a_channel_semaphore_wait_fd(
+static int semaphore_wait_fd_proxy(struct channel_gk20a *c, int fd,
-                struct gk20a_channel_sync *s, int fd,
+                struct priv_cmd_entry *wait_cmd,
-                struct priv_cmd_entry *entry,
+                struct gk20a_fence *fence_out,
-                struct gk20a_fence *fence)
+                struct sync_timeline *timeline)
 {
-        struct gk20a_channel_semaphore *sema =
+        const int wait_cmd_size = 8;
-                container_of(s, struct gk20a_channel_semaphore, ops);
-        struct channel_gk20a *c = sema->c;
-#ifdef CONFIG_SYNC
-        struct nvgpu_semaphore *fp_sema;
        struct sync_fence *sync_fence;
-        struct priv_cmd_entry *wait_cmd = entry;
        struct wait_fence_work *w = NULL;
-        int err, ret, status;
+        int err, status;
-        sync_fence = gk20a_sync_fence_fdget(fd);
+        sync_fence = sync_fence_fdget(fd);
        if (!sync_fence)
                return -EINVAL;
-        ret = __semaphore_wait_fd_fast_path(c, sync_fence, wait_cmd, &fp_sema);
-        if (ret == 0) {
-                if (fp_sema) {
-                        err = gk20a_fence_from_semaphore(c->g, fence,
-                                        sema->timeline,
-                                        fp_sema,
-                                        &c->semaphore_wq,
-                                        false);
-                        if (err) {
-                                nvgpu_semaphore_put(fp_sema);
-                                goto clean_up_priv_cmd;
-                        }
-                } else
-                        /*
-                         * Init an empty fence. It will instantly return
-                         * from gk20a_fence_wait().
-                         */
-                        gk20a_init_fence(fence, NULL, NULL);
-                sync_fence_put(sync_fence);
-                goto skip_slow_path;
-        }
        /* If the fence has signaled there is no reason to wait on it. */
        status = atomic_read(&sync_fence->status);
        if (status == 0) {
                sync_fence_put(sync_fence);
-                goto skip_slow_path;
+                return 0;
        }
-        err = gk20a_channel_alloc_priv_cmdbuf(c, 8, wait_cmd);
+        err = gk20a_channel_alloc_priv_cmdbuf(c, wait_cmd_size, wait_cmd);
        if (err) {
                nvgpu_err(c->g,
                                "not enough priv cmd buffer space");
@@ -718,34 +689,34 @@ static int gk20a_channel_semaphore_wait_fd(
        nvgpu_semaphore_incr(w->sema, c->hw_sema);
        /* GPU unblocked when the semaphore value increments. */
-        add_sema_cmd(c->g, c, w->sema, wait_cmd, 8, true, false);
+        add_sema_cmd(c->g, c, w->sema, wait_cmd, 0, true, false);
        /*
         *  We need to create the fence before adding the waiter to ensure
         *  that we properly clean up in the event the sync_fence has
         *  already signaled
         */
-        err = gk20a_fence_from_semaphore(c->g, fence, sema->timeline, w->sema,
+        err = gk20a_fence_from_semaphore(c->g, fence_out, timeline,
-                        &c->semaphore_wq, false);
+                        w->sema, &c->semaphore_wq, false);
        if (err)
                goto clean_up_sema;
-        ret = sync_fence_wait_async(sync_fence, &w->waiter);
+        err = sync_fence_wait_async(sync_fence, &w->waiter);
        gk20a_add_pending_sema_wait(c->g, w);
-        /*
+        /*
-         * If the sync_fence has already signaled then the above async_wait
+         * If the sync_fence has already signaled then the above wait_async
-         * will never trigger. This causes the semaphore release op to never
+         * will not get scheduled; the fence completed just after doing the
-         * happen which, in turn, hangs the GPU. That's bad. So let's just
+         * status check above before allocs and waiter init, and won the race.
-         * do the nvgpu_semaphore_release() right now.
+         * This causes the waiter to be skipped, so let's release the semaphore
+         * here and put the refs taken for the worker.
         */
-        if (ret == 1) {
+        if (err == 1) {
                sync_fence_put(sync_fence);
                nvgpu_semaphore_release(w->sema, c->hw_sema);
                nvgpu_semaphore_put(w->sema);
        }
-skip_slow_path:
        return 0;
 clean_up_sema:
@@ -758,10 +729,28 @@ clean_up_sema:
 clean_up_worker:
        nvgpu_kfree(c->g, w);
 clean_up_priv_cmd:
-        gk20a_free_priv_cmdbuf(c, entry);
+        gk20a_free_priv_cmdbuf(c, wait_cmd);
 clean_up_sync_fence:
        sync_fence_put(sync_fence);
        return err;
+}
+#endif
+static int gk20a_channel_semaphore_wait_fd(
+                struct gk20a_channel_sync *s, int fd,
+                struct priv_cmd_entry *entry,
+                struct gk20a_fence *fence)
+{
+        struct gk20a_channel_semaphore *sema =
+                container_of(s, struct gk20a_channel_semaphore, ops);
+        struct channel_gk20a *c = sema->c;
+#ifdef CONFIG_SYNC
+        int err;
+        err = semaphore_wait_fd_native(c, fd, entry);
+        if (err)
+                err = semaphore_wait_fd_proxy(c, fd, entry, fence, sema->timeline);
+        return err;
 #else
        nvgpu_err(c->g,
                  "trying to use sync fds with CONFIG_SYNC disabled");
@@ -798,7 +787,7 @@ static int __gk20a_channel_semaphore_incr(
        }
        /* Release the completion semaphore. */
-        add_sema_cmd(c->g, c, semaphore, incr_cmd, 14, false, wfi_cmd);
+        add_sema_cmd(c->g, c, semaphore, incr_cmd, 0, false, wfi_cmd);
        err = gk20a_fence_from_semaphore(c->g, fence,
                        sp->timeline, semaphore,

diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c index 4b1be8b9..c6b55bf8 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -502,10 +502,10 @@ static void gk20a_channel_semaphore_launcher(
502		502
503	static void add_sema_cmd(struct gk20a g, struct channel_gk20a c,	503	static void add_sema_cmd(struct gk20a g, struct channel_gk20a c,
504	struct nvgpu_semaphore s, struct priv_cmd_entry cmd,	504	struct nvgpu_semaphore s, struct priv_cmd_entry cmd,
505	int cmd_size, bool acquire, bool wfi)	505	u32 offset, bool acquire, bool wfi)
506	{	506	{
507	int ch = c->chid;	507	int ch = c->chid;
508	u32 ob, off = cmd->off;	508	u32 ob, off = cmd->off + offset;
509	u64 va;	509	u64 va;
510		510
511	ob = off;	511	ob = off;
@@ -588,108 +588,79 @@ static int gk20a_channel_semaphore_wait_syncpt(
588	}	588	}
589		589
590	#ifdef CONFIG_SYNC	590	#ifdef CONFIG_SYNC
591	/*	591	static int semaphore_wait_fd_native(struct channel_gk20a *c, int fd,
592	* Attempt a fast path for waiting on a sync_fence. Basically if the passed	592	struct priv_cmd_entry *wait_cmd)
593	* sync_fence is backed by a nvgpu_semaphore then there's no reason to go
594	* through the rigmarole of setting up a separate semaphore which waits on an
595	* interrupt from the GPU and then triggers a worker thread to execute a SW
596	* based semaphore release. Instead just have the GPU wait on the same semaphore
597	* that is going to be incremented by the GPU.
598	*
599	* This function returns 2 possible values: -ENODEV or 0 on success. In the case
600	* of -ENODEV the fastpath cannot be taken due to the fence not being backed by
601	* a GPU semaphore.
602	*/
603	static int __semaphore_wait_fd_fast_path(struct channel_gk20a *c,
604	struct sync_fence *fence,
605	struct priv_cmd_entry *wait_cmd,
606	struct nvgpu_semaphore **fp_sema)
607	{	593	{
608	struct nvgpu_semaphore *sema;	594	struct sync_fence *sync_fence;
609	int err;	595	int err;
		596	const int wait_cmd_size = 8;
		597	int num_wait_cmds;
		598	int i;
610		599
611	if (!gk20a_is_sema_backed_sync_fence(fence))	600	sync_fence = gk20a_sync_fence_fdget(fd);
612	return -ENODEV;	601	if (!sync_fence)
613		602	return -EINVAL;
614	sema = gk20a_sync_fence_get_sema(fence);
615		603
616	/*	604	num_wait_cmds = sync_fence->num_fences;
617	* If there's no underlying sema then that means the underlying sema has	605	if (num_wait_cmds == 0) {
618	* already signaled.	606	err = 0;
619	*/	607	goto put_fence;
620	if (!sema) {
621	*fp_sema = NULL;
622	return 0;
623	}	608	}
624		609
625	err = gk20a_channel_alloc_priv_cmdbuf(c, 8, wait_cmd);	610	err = gk20a_channel_alloc_priv_cmdbuf(c,
626	if (err)	611	wait_cmd_size * num_wait_cmds,
627	return err;	612	wait_cmd);
		613	if (err) {
		614	nvgpu_err(c->g, "not enough priv cmd buffer space");
		615	goto put_fence;
		616	}
628		617
629	nvgpu_semaphore_get(sema);	618	for (i = 0; i < sync_fence->num_fences; i++) {
630	BUG_ON(!sema->incremented);	619	struct fence *f = sync_fence->cbs[i].sync_pt;
631	add_sema_cmd(c->g, c, sema, wait_cmd, 8, true, false);	620	struct sync_pt *pt = sync_pt_from_fence(f);
		621	struct nvgpu_semaphore *sema;
632		622
633	/*	623	sema = gk20a_sync_pt_sema(pt);
634	* Make sure that gk20a_channel_semaphore_wait_fd() can create another	624	if (!sema) {
635	* fence with the underlying semaphore.	625	/* expired */
636	*/	626	nvgpu_memset(c->g, wait_cmd->mem,
637	*fp_sema = sema;	627	(wait_cmd->off + i * wait_cmd_size) * sizeof(u32),
		628	0, wait_cmd_size * sizeof(u32));
		629	} else {
		630	WARN_ON(!sema->incremented);
		631	add_sema_cmd(c->g, c, sema, wait_cmd,
		632	i * wait_cmd_size, true, false);
		633	nvgpu_semaphore_put(sema);
		634	}
		635	}
638		636
639	return 0;	637	put_fence:
		638	sync_fence_put(sync_fence);
		639	return err;
640	}	640	}
641	#endif
642		641
643	static int gk20a_channel_semaphore_wait_fd(	642	static int semaphore_wait_fd_proxy(struct channel_gk20a *c, int fd,
644	struct gk20a_channel_sync *s, int fd,	643	struct priv_cmd_entry *wait_cmd,
645	struct priv_cmd_entry *entry,	644	struct gk20a_fence *fence_out,
646	struct gk20a_fence *fence)	645	struct sync_timeline *timeline)
647	{	646	{
648	struct gk20a_channel_semaphore *sema =	647	const int wait_cmd_size = 8;
649	container_of(s, struct gk20a_channel_semaphore, ops);
650	struct channel_gk20a *c = sema->c;
651	#ifdef CONFIG_SYNC
652	struct nvgpu_semaphore *fp_sema;
653	struct sync_fence *sync_fence;	648	struct sync_fence *sync_fence;
654	struct priv_cmd_entry *wait_cmd = entry;
655	struct wait_fence_work *w = NULL;	649	struct wait_fence_work *w = NULL;
656	int err, ret, status;	650	int err, status;
657		651
658	sync_fence = gk20a_sync_fence_fdget(fd);	652	sync_fence = sync_fence_fdget(fd);
659	if (!sync_fence)	653	if (!sync_fence)
660	return -EINVAL;	654	return -EINVAL;
661		655
662	ret = __semaphore_wait_fd_fast_path(c, sync_fence, wait_cmd, &fp_sema);
663	if (ret == 0) {
664	if (fp_sema) {
665	err = gk20a_fence_from_semaphore(c->g, fence,
666	sema->timeline,
667	fp_sema,
668	&c->semaphore_wq,
669	false);
670	if (err) {
671	nvgpu_semaphore_put(fp_sema);
672	goto clean_up_priv_cmd;
673	}
674	} else
675	/*
676	* Init an empty fence. It will instantly return
677	* from gk20a_fence_wait().
678	*/
679	gk20a_init_fence(fence, NULL, NULL);
680
681	sync_fence_put(sync_fence);
682	goto skip_slow_path;
683	}
684
685	/* If the fence has signaled there is no reason to wait on it. */	656	/* If the fence has signaled there is no reason to wait on it. */
686	status = atomic_read(&sync_fence->status);	657	status = atomic_read(&sync_fence->status);
687	if (status == 0) {	658	if (status == 0) {
688	sync_fence_put(sync_fence);	659	sync_fence_put(sync_fence);
689	goto skip_slow_path;	660	return 0;
690	}	661	}
691		662
692	err = gk20a_channel_alloc_priv_cmdbuf(c, 8, wait_cmd);	663	err = gk20a_channel_alloc_priv_cmdbuf(c, wait_cmd_size, wait_cmd);
693	if (err) {	664	if (err) {
694	nvgpu_err(c->g,	665	nvgpu_err(c->g,
695	"not enough priv cmd buffer space");	666	"not enough priv cmd buffer space");
@@ -718,34 +689,34 @@ static int gk20a_channel_semaphore_wait_fd(
718	nvgpu_semaphore_incr(w->sema, c->hw_sema);	689	nvgpu_semaphore_incr(w->sema, c->hw_sema);
719		690
720	/* GPU unblocked when the semaphore value increments. */	691	/* GPU unblocked when the semaphore value increments. */
721	add_sema_cmd(c->g, c, w->sema, wait_cmd, 8, true, false);	692	add_sema_cmd(c->g, c, w->sema, wait_cmd, 0, true, false);
722		693
723	/*	694	/*
724	* We need to create the fence before adding the waiter to ensure	695	* We need to create the fence before adding the waiter to ensure
725	* that we properly clean up in the event the sync_fence has	696	* that we properly clean up in the event the sync_fence has
726	* already signaled	697	* already signaled
727	*/	698	*/
728	err = gk20a_fence_from_semaphore(c->g, fence, sema->timeline, w->sema,	699	err = gk20a_fence_from_semaphore(c->g, fence_out, timeline,
729	&c->semaphore_wq, false);	700	w->sema, &c->semaphore_wq, false);
730	if (err)	701	if (err)
731	goto clean_up_sema;	702	goto clean_up_sema;
732		703
733	ret = sync_fence_wait_async(sync_fence, &w->waiter);	704	err = sync_fence_wait_async(sync_fence, &w->waiter);
734	gk20a_add_pending_sema_wait(c->g, w);	705	gk20a_add_pending_sema_wait(c->g, w);
735		706
736	/*	707	/*
737	* If the sync_fence has already signaled then the above async_wait	708	* If the sync_fence has already signaled then the above wait_async
738	* will never trigger. This causes the semaphore release op to never	709	* will not get scheduled; the fence completed just after doing the
739	* happen which, in turn, hangs the GPU. That's bad. So let's just	710	* status check above before allocs and waiter init, and won the race.
740	* do the nvgpu_semaphore_release() right now.	711	* This causes the waiter to be skipped, so let's release the semaphore
		712	* here and put the refs taken for the worker.
741	*/	713	*/
742	if (ret == 1) {	714	if (err == 1) {
743	sync_fence_put(sync_fence);	715	sync_fence_put(sync_fence);
744	nvgpu_semaphore_release(w->sema, c->hw_sema);	716	nvgpu_semaphore_release(w->sema, c->hw_sema);
745	nvgpu_semaphore_put(w->sema);	717	nvgpu_semaphore_put(w->sema);
746	}	718	}
747		719
748	skip_slow_path:
749	return 0;	720	return 0;
750		721
751	clean_up_sema:	722	clean_up_sema:
@@ -758,10 +729,28 @@ clean_up_sema:
758	clean_up_worker:	729	clean_up_worker:
759	nvgpu_kfree(c->g, w);	730	nvgpu_kfree(c->g, w);
760	clean_up_priv_cmd:	731	clean_up_priv_cmd:
761	gk20a_free_priv_cmdbuf(c, entry);	732	gk20a_free_priv_cmdbuf(c, wait_cmd);
762	clean_up_sync_fence:	733	clean_up_sync_fence:
763	sync_fence_put(sync_fence);	734	sync_fence_put(sync_fence);
764	return err;	735	return err;
		736	}
		737	#endif
		738
		739	static int gk20a_channel_semaphore_wait_fd(
		740	struct gk20a_channel_sync *s, int fd,
		741	struct priv_cmd_entry *entry,
		742	struct gk20a_fence *fence)
		743	{
		744	struct gk20a_channel_semaphore *sema =
		745	container_of(s, struct gk20a_channel_semaphore, ops);
		746	struct channel_gk20a *c = sema->c;
		747	#ifdef CONFIG_SYNC
		748	int err;
		749
		750	err = semaphore_wait_fd_native(c, fd, entry);
		751	if (err)
		752	err = semaphore_wait_fd_proxy(c, fd, entry, fence, sema->timeline);
		753	return err;
765	#else	754	#else
766	nvgpu_err(c->g,	755	nvgpu_err(c->g,
767	"trying to use sync fds with CONFIG_SYNC disabled");	756	"trying to use sync fds with CONFIG_SYNC disabled");
@@ -798,7 +787,7 @@ static int __gk20a_channel_semaphore_incr(
798	}	787	}
799		788
800	/* Release the completion semaphore. */	789	/* Release the completion semaphore. */
801	add_sema_cmd(c->g, c, semaphore, incr_cmd, 14, false, wfi_cmd);	790	add_sema_cmd(c->g, c, semaphore, incr_cmd, 0, false, wfi_cmd);
802		791
803	err = gk20a_fence_from_semaphore(c->g, fence,	792	err = gk20a_fence_from_semaphore(c->g, fence,
804	sp->timeline, semaphore,	793	sp->timeline, semaphore,