gpu: nvgpu: allow syncfds as prefences on deterministic

Accept submits on deterministic channels even when the prefence is a syncfd, but only if it has just one fence inside. Because NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE is shared between pre- and postfences, a postfence (SUBMIT_GPFIFO_FLAGS_FENCE_GET) is not allowed at the same time though. The sync framework is problematic for deterministic channels due to certain allocations that are not controlled by nvgpu. However, that only applies for postfences, yet we've disallowed FLAGS_SYNC_FENCE for deterministic channels even when a postfence is not needed. Bug 200390539 Change-Id: I099bbadc11cc2f093fb2c585f3bd909143238d57 Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1680271 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: Konsta Holtta <kholtta@nvidia.com> 2018-03-22 07:19:34 -0400
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2018-03-23 20:18:15 -0400
commit: bac51e8081a2ae778bfdccc1c57ee3ef382de077 (patch)
tree: 47fcde5793bd51ed858aa1dcfb3e9f8d082fbe8f
parent: 90925a739ab80a93c6a325c12a61752c24339089 (diff)
3 files changed, 19 insertions, 9 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/channel.c b/drivers/gpu/nvgpu/common/linux/channel.c
index 1d4afcc8..07bb393e 100644
--- a/drivers/gpu/nvgpu/common/linux/channel.c
+++ b/drivers/gpu/nvgpu/common/linux/channel.c
@@ -469,6 +469,8 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c,
         * this condition.
         */
        if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) {
+                int max_wait_cmds = c->deterministic ? 1 : 0;
                if (!pre_alloc_enabled)
                        job->wait_cmd = nvgpu_kzalloc(g,
                                sizeof(struct priv_cmd_entry));
@@ -481,7 +483,7 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c,
                if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) {
                        wait_fence_fd = fence->id;
                        err = c->sync->wait_fd(c->sync, wait_fence_fd,
-                                               job->wait_cmd);
+                                               job->wait_cmd, max_wait_cmds);
                } else {
                        err = c->sync->wait_syncpt(c->sync, fence->id,
                                                   fence->value,
@@ -758,8 +760,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
                need_sync_framework = force_need_sync_fence ||
                        gk20a_channel_sync_needs_sync_framework(g) ||
                        (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE &&
-                        (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT ||
+                         flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET);
-                         flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET));
                /*
                 * Deferred clean-up is necessary for any of the following
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
index 236ddaaf..25c57681 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -83,7 +83,7 @@ static int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s,
 }
 static int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd,
-                       struct priv_cmd_entry *wait_cmd)
+                       struct priv_cmd_entry *wait_cmd, int max_wait_cmds)
 {
 #ifdef CONFIG_SYNC
        int i;
@@ -101,6 +101,11 @@ static int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd,
        if (!sync_fence)
                return -EINVAL;
+        if (max_wait_cmds && sync_fence->num_fences > max_wait_cmds) {
+                sync_fence_put(sync_fence);
+                return -EINVAL;
+        }
        /* validate syncpt ids */
        for (i = 0; i < sync_fence->num_fences; i++) {
                pt = sync_pt_from_fence(sync_fence->cbs[i].sync_pt);
@@ -473,7 +478,7 @@ static int gk20a_channel_semaphore_wait_syncpt(
 #ifdef CONFIG_SYNC
 static int semaphore_wait_fd_native(struct channel_gk20a *c, int fd,
-                struct priv_cmd_entry *wait_cmd)
+                struct priv_cmd_entry *wait_cmd, int max_wait_cmds)
 {
        struct sync_fence *sync_fence;
        int err;
@@ -491,6 +496,11 @@ static int semaphore_wait_fd_native(struct channel_gk20a *c, int fd,
                goto put_fence;
        }
+        if (max_wait_cmds && sync_fence->num_fences > max_wait_cmds) {
+                err = -EINVAL;
+                goto put_fence;
+        }
        err = gk20a_channel_alloc_priv_cmdbuf(c,
                        wait_cmd_size * num_wait_cmds,
                        wait_cmd);
@@ -526,14 +536,13 @@ put_fence:
 static int gk20a_channel_semaphore_wait_fd(
                struct gk20a_channel_sync *s, int fd,
-                struct priv_cmd_entry *entry)
+                struct priv_cmd_entry *entry, int max_wait_cmds)
 {
        struct gk20a_channel_semaphore *sema =
                container_of(s, struct gk20a_channel_semaphore, ops);
        struct channel_gk20a *c = sema->c;
 #ifdef CONFIG_SYNC
+        return semaphore_wait_fd_native(c, fd, entry, max_wait_cmds);
-        return semaphore_wait_fd_native(c, fd, entry);
 #else
        nvgpu_err(c->g,
                  "trying to use sync fds with CONFIG_SYNC disabled");
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
index da8cb251..adbecbe1 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
@@ -46,7 +46,7 @@ struct gk20a_channel_sync {
         * Returns a gpu cmdbuf that performs the wait when executed
         */
        int (*wait_fd)(struct gk20a_channel_sync *s, int fd,
-                       struct priv_cmd_entry *entry);
+                       struct priv_cmd_entry *entry, int max_wait_cmds);
        /* Increment syncpoint/semaphore.
         * Returns
author	Konsta Holtta <kholtta@nvidia.com>	2018-03-22 07:19:34 -0400
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2018-03-23 20:18:15 -0400
commit	bac51e8081a2ae778bfdccc1c57ee3ef382de077 (patch)
tree	47fcde5793bd51ed858aa1dcfb3e9f8d082fbe8f
parent	90925a739ab80a93c6a325c12a61752c24339089 (diff)

diff --git a/drivers/gpu/nvgpu/common/linux/channel.c b/drivers/gpu/nvgpu/common/linux/channel.c index 1d4afcc8..07bb393e 100644 --- a/drivers/gpu/nvgpu/common/linux/channel.c +++ b/drivers/gpu/nvgpu/common/linux/channel.c
@@ -469,6 +469,8 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c,
469	* this condition.	469	* this condition.
470	*/	470	*/
471	if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) {	471	if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) {
		472	int max_wait_cmds = c->deterministic ? 1 : 0;
		473
472	if (!pre_alloc_enabled)	474	if (!pre_alloc_enabled)
473	job->wait_cmd = nvgpu_kzalloc(g,	475	job->wait_cmd = nvgpu_kzalloc(g,
474	sizeof(struct priv_cmd_entry));	476	sizeof(struct priv_cmd_entry));
@@ -481,7 +483,7 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c,
481	if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) {	483	if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) {
482	wait_fence_fd = fence->id;	484	wait_fence_fd = fence->id;
483	err = c->sync->wait_fd(c->sync, wait_fence_fd,	485	err = c->sync->wait_fd(c->sync, wait_fence_fd,
484	job->wait_cmd);	486	job->wait_cmd, max_wait_cmds);
485	} else {	487	} else {
486	err = c->sync->wait_syncpt(c->sync, fence->id,	488	err = c->sync->wait_syncpt(c->sync, fence->id,
487	fence->value,	489	fence->value,
@@ -758,8 +760,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
758	need_sync_framework = force_need_sync_fence \|\|	760	need_sync_framework = force_need_sync_fence \|\|
759	gk20a_channel_sync_needs_sync_framework(g) \|\|	761	gk20a_channel_sync_needs_sync_framework(g) \|\|
760	(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE &&	762	(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE &&
761	(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT \|\|	763	flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET);
762	flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET));
763		764
764	/*	765	/*
765	* Deferred clean-up is necessary for any of the following	766	* Deferred clean-up is necessary for any of the following


diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c index 236ddaaf..25c57681 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -83,7 +83,7 @@ static int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s,
83	}	83	}
84		84
85	static int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd,	85	static int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd,
86	struct priv_cmd_entry *wait_cmd)	86	struct priv_cmd_entry *wait_cmd, int max_wait_cmds)
87	{	87	{
88	#ifdef CONFIG_SYNC	88	#ifdef CONFIG_SYNC
89	int i;	89	int i;
@@ -101,6 +101,11 @@ static int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd,
101	if (!sync_fence)	101	if (!sync_fence)
102	return -EINVAL;	102	return -EINVAL;
103		103
		104	if (max_wait_cmds && sync_fence->num_fences > max_wait_cmds) {
		105	sync_fence_put(sync_fence);
		106	return -EINVAL;
		107	}
		108
104	/* validate syncpt ids */	109	/* validate syncpt ids */
105	for (i = 0; i < sync_fence->num_fences; i++) {	110	for (i = 0; i < sync_fence->num_fences; i++) {
106	pt = sync_pt_from_fence(sync_fence->cbs[i].sync_pt);	111	pt = sync_pt_from_fence(sync_fence->cbs[i].sync_pt);
@@ -473,7 +478,7 @@ static int gk20a_channel_semaphore_wait_syncpt(
473		478
474	#ifdef CONFIG_SYNC	479	#ifdef CONFIG_SYNC
475	static int semaphore_wait_fd_native(struct channel_gk20a *c, int fd,	480	static int semaphore_wait_fd_native(struct channel_gk20a *c, int fd,
476	struct priv_cmd_entry *wait_cmd)	481	struct priv_cmd_entry *wait_cmd, int max_wait_cmds)
477	{	482	{
478	struct sync_fence *sync_fence;	483	struct sync_fence *sync_fence;
479	int err;	484	int err;
@@ -491,6 +496,11 @@ static int semaphore_wait_fd_native(struct channel_gk20a *c, int fd,
491	goto put_fence;	496	goto put_fence;
492	}	497	}
493		498
		499	if (max_wait_cmds && sync_fence->num_fences > max_wait_cmds) {
		500	err = -EINVAL;
		501	goto put_fence;
		502	}
		503
494	err = gk20a_channel_alloc_priv_cmdbuf(c,	504	err = gk20a_channel_alloc_priv_cmdbuf(c,
495	wait_cmd_size * num_wait_cmds,	505	wait_cmd_size * num_wait_cmds,
496	wait_cmd);	506	wait_cmd);
@@ -526,14 +536,13 @@ put_fence:
526		536
527	static int gk20a_channel_semaphore_wait_fd(	537	static int gk20a_channel_semaphore_wait_fd(
528	struct gk20a_channel_sync *s, int fd,	538	struct gk20a_channel_sync *s, int fd,
529	struct priv_cmd_entry *entry)	539	struct priv_cmd_entry *entry, int max_wait_cmds)
530	{	540	{
531	struct gk20a_channel_semaphore *sema =	541	struct gk20a_channel_semaphore *sema =
532	container_of(s, struct gk20a_channel_semaphore, ops);	542	container_of(s, struct gk20a_channel_semaphore, ops);
533	struct channel_gk20a *c = sema->c;	543	struct channel_gk20a *c = sema->c;
534	#ifdef CONFIG_SYNC	544	#ifdef CONFIG_SYNC
535		545	return semaphore_wait_fd_native(c, fd, entry, max_wait_cmds);
536	return semaphore_wait_fd_native(c, fd, entry);
537	#else	546	#else
538	nvgpu_err(c->g,	547	nvgpu_err(c->g,
539	"trying to use sync fds with CONFIG_SYNC disabled");	548	"trying to use sync fds with CONFIG_SYNC disabled");


diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h index da8cb251..adbecbe1 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
@@ -46,7 +46,7 @@ struct gk20a_channel_sync {
46	* Returns a gpu cmdbuf that performs the wait when executed	46	* Returns a gpu cmdbuf that performs the wait when executed
47	*/	47	*/
48	int (wait_fd)(struct gk20a_channel_sync s, int fd,	48	int (wait_fd)(struct gk20a_channel_sync s, int fd,
49	struct priv_cmd_entry *entry);	49	struct priv_cmd_entry *entry, int max_wait_cmds);
50		50
51	/* Increment syncpoint/semaphore.	51	/* Increment syncpoint/semaphore.
52	* Returns	52	* Returns