diff options
author | Konsta Holtta <kholtta@nvidia.com> | 2018-03-22 07:19:34 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2018-03-23 20:18:15 -0400 |
commit | bac51e8081a2ae778bfdccc1c57ee3ef382de077 (patch) | |
tree | 47fcde5793bd51ed858aa1dcfb3e9f8d082fbe8f | |
parent | 90925a739ab80a93c6a325c12a61752c24339089 (diff) |
gpu: nvgpu: allow syncfds as prefences on deterministic
Accept submits on deterministic channels even when the prefence is a
syncfd, but only if it has just one fence inside.
Because NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE is shared between pre- and
postfences, a postfence (SUBMIT_GPFIFO_FLAGS_FENCE_GET) is not allowed
at the same time though.
The sync framework is problematic for deterministic channels due to
certain allocations that are not controlled by nvgpu. However, that only
applies for postfences, yet we've disallowed FLAGS_SYNC_FENCE for
deterministic channels even when a postfence is not needed.
Bug 200390539
Change-Id: I099bbadc11cc2f093fb2c585f3bd909143238d57
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1680271
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/channel.c | 7 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | 19 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h | 2 |
3 files changed, 19 insertions, 9 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/channel.c b/drivers/gpu/nvgpu/common/linux/channel.c index 1d4afcc8..07bb393e 100644 --- a/drivers/gpu/nvgpu/common/linux/channel.c +++ b/drivers/gpu/nvgpu/common/linux/channel.c | |||
@@ -469,6 +469,8 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c, | |||
469 | * this condition. | 469 | * this condition. |
470 | */ | 470 | */ |
471 | if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) { | 471 | if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) { |
472 | int max_wait_cmds = c->deterministic ? 1 : 0; | ||
473 | |||
472 | if (!pre_alloc_enabled) | 474 | if (!pre_alloc_enabled) |
473 | job->wait_cmd = nvgpu_kzalloc(g, | 475 | job->wait_cmd = nvgpu_kzalloc(g, |
474 | sizeof(struct priv_cmd_entry)); | 476 | sizeof(struct priv_cmd_entry)); |
@@ -481,7 +483,7 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c, | |||
481 | if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) { | 483 | if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) { |
482 | wait_fence_fd = fence->id; | 484 | wait_fence_fd = fence->id; |
483 | err = c->sync->wait_fd(c->sync, wait_fence_fd, | 485 | err = c->sync->wait_fd(c->sync, wait_fence_fd, |
484 | job->wait_cmd); | 486 | job->wait_cmd, max_wait_cmds); |
485 | } else { | 487 | } else { |
486 | err = c->sync->wait_syncpt(c->sync, fence->id, | 488 | err = c->sync->wait_syncpt(c->sync, fence->id, |
487 | fence->value, | 489 | fence->value, |
@@ -758,8 +760,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
758 | need_sync_framework = force_need_sync_fence || | 760 | need_sync_framework = force_need_sync_fence || |
759 | gk20a_channel_sync_needs_sync_framework(g) || | 761 | gk20a_channel_sync_needs_sync_framework(g) || |
760 | (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE && | 762 | (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE && |
761 | (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT || | 763 | flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET); |
762 | flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)); | ||
763 | 764 | ||
764 | /* | 765 | /* |
765 | * Deferred clean-up is necessary for any of the following | 766 | * Deferred clean-up is necessary for any of the following |
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c index 236ddaaf..25c57681 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | |||
@@ -83,7 +83,7 @@ static int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s, | |||
83 | } | 83 | } |
84 | 84 | ||
85 | static int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd, | 85 | static int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd, |
86 | struct priv_cmd_entry *wait_cmd) | 86 | struct priv_cmd_entry *wait_cmd, int max_wait_cmds) |
87 | { | 87 | { |
88 | #ifdef CONFIG_SYNC | 88 | #ifdef CONFIG_SYNC |
89 | int i; | 89 | int i; |
@@ -101,6 +101,11 @@ static int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd, | |||
101 | if (!sync_fence) | 101 | if (!sync_fence) |
102 | return -EINVAL; | 102 | return -EINVAL; |
103 | 103 | ||
104 | if (max_wait_cmds && sync_fence->num_fences > max_wait_cmds) { | ||
105 | sync_fence_put(sync_fence); | ||
106 | return -EINVAL; | ||
107 | } | ||
108 | |||
104 | /* validate syncpt ids */ | 109 | /* validate syncpt ids */ |
105 | for (i = 0; i < sync_fence->num_fences; i++) { | 110 | for (i = 0; i < sync_fence->num_fences; i++) { |
106 | pt = sync_pt_from_fence(sync_fence->cbs[i].sync_pt); | 111 | pt = sync_pt_from_fence(sync_fence->cbs[i].sync_pt); |
@@ -473,7 +478,7 @@ static int gk20a_channel_semaphore_wait_syncpt( | |||
473 | 478 | ||
474 | #ifdef CONFIG_SYNC | 479 | #ifdef CONFIG_SYNC |
475 | static int semaphore_wait_fd_native(struct channel_gk20a *c, int fd, | 480 | static int semaphore_wait_fd_native(struct channel_gk20a *c, int fd, |
476 | struct priv_cmd_entry *wait_cmd) | 481 | struct priv_cmd_entry *wait_cmd, int max_wait_cmds) |
477 | { | 482 | { |
478 | struct sync_fence *sync_fence; | 483 | struct sync_fence *sync_fence; |
479 | int err; | 484 | int err; |
@@ -491,6 +496,11 @@ static int semaphore_wait_fd_native(struct channel_gk20a *c, int fd, | |||
491 | goto put_fence; | 496 | goto put_fence; |
492 | } | 497 | } |
493 | 498 | ||
499 | if (max_wait_cmds && sync_fence->num_fences > max_wait_cmds) { | ||
500 | err = -EINVAL; | ||
501 | goto put_fence; | ||
502 | } | ||
503 | |||
494 | err = gk20a_channel_alloc_priv_cmdbuf(c, | 504 | err = gk20a_channel_alloc_priv_cmdbuf(c, |
495 | wait_cmd_size * num_wait_cmds, | 505 | wait_cmd_size * num_wait_cmds, |
496 | wait_cmd); | 506 | wait_cmd); |
@@ -526,14 +536,13 @@ put_fence: | |||
526 | 536 | ||
527 | static int gk20a_channel_semaphore_wait_fd( | 537 | static int gk20a_channel_semaphore_wait_fd( |
528 | struct gk20a_channel_sync *s, int fd, | 538 | struct gk20a_channel_sync *s, int fd, |
529 | struct priv_cmd_entry *entry) | 539 | struct priv_cmd_entry *entry, int max_wait_cmds) |
530 | { | 540 | { |
531 | struct gk20a_channel_semaphore *sema = | 541 | struct gk20a_channel_semaphore *sema = |
532 | container_of(s, struct gk20a_channel_semaphore, ops); | 542 | container_of(s, struct gk20a_channel_semaphore, ops); |
533 | struct channel_gk20a *c = sema->c; | 543 | struct channel_gk20a *c = sema->c; |
534 | #ifdef CONFIG_SYNC | 544 | #ifdef CONFIG_SYNC |
535 | 545 | return semaphore_wait_fd_native(c, fd, entry, max_wait_cmds); | |
536 | return semaphore_wait_fd_native(c, fd, entry); | ||
537 | #else | 546 | #else |
538 | nvgpu_err(c->g, | 547 | nvgpu_err(c->g, |
539 | "trying to use sync fds with CONFIG_SYNC disabled"); | 548 | "trying to use sync fds with CONFIG_SYNC disabled"); |
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h index da8cb251..adbecbe1 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h | |||
@@ -46,7 +46,7 @@ struct gk20a_channel_sync { | |||
46 | * Returns a gpu cmdbuf that performs the wait when executed | 46 | * Returns a gpu cmdbuf that performs the wait when executed |
47 | */ | 47 | */ |
48 | int (*wait_fd)(struct gk20a_channel_sync *s, int fd, | 48 | int (*wait_fd)(struct gk20a_channel_sync *s, int fd, |
49 | struct priv_cmd_entry *entry); | 49 | struct priv_cmd_entry *entry, int max_wait_cmds); |
50 | 50 | ||
51 | /* Increment syncpoint/semaphore. | 51 | /* Increment syncpoint/semaphore. |
52 | * Returns | 52 | * Returns |