diff options
author | Deepak Nibade <dnibade@nvidia.com> | 2018-02-21 02:49:37 -0500 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2018-02-26 06:48:14 -0500 |
commit | 0c46f8a5e112c08c172ee2c692832e1753ffbcce (patch) | |
tree | fae2114e3596507554fda4e35a8942b509017d80 /drivers/gpu | |
parent | 8d5536271f989e01018a543016340a3d76a2fae2 (diff) |
gpu: nvgpu: support user fence updates
Add support for user fence updates i.e. increments added by user space
in pushbuffer directly
Add a submit IOCTL flag NVGPU_SUBMIT_GPFIFO_FLAGS_USER_FENCE_UPDATE to indicate
if User has added increments in pushbuffer
If yes, number_of_increment value is received in fence.value from User
If User is adding increments in the pushbuffer then we don't need to do any job
tracking in the kernel
So fail the submit if we evaluate need_job_tracking to true and
FLAGS_USER_FENCE_UPDATE is set
User is responsible for ensuring all pre-requisites for a fast submit and to
prevent kernel job tracking
Since user space adds increments in the pushbuffer, just handle the threshold
book keeping in kernel.
Bug 200326065
Jira NVGPU-179
Change-Id: Ic0f0b1aa69e3389a4c3305fb6a559c5113719e0f
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1661854
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
Reviewed-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/channel.c | 22 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | 15 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h | 3 |
3 files changed, 39 insertions, 1 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/channel.c b/drivers/gpu/nvgpu/common/linux/channel.c index a725cd6b..35fb3023 100644 --- a/drivers/gpu/nvgpu/common/linux/channel.c +++ b/drivers/gpu/nvgpu/common/linux/channel.c | |||
@@ -720,7 +720,8 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
720 | return -EINVAL; | 720 | return -EINVAL; |
721 | 721 | ||
722 | if ((flags & (NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT | | 722 | if ((flags & (NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT | |
723 | NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)) && | 723 | NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET | |
724 | NVGPU_SUBMIT_GPFIFO_FLAGS_USER_FENCE_UPDATE)) && | ||
724 | !fence) | 725 | !fence) |
725 | return -EINVAL; | 726 | return -EINVAL; |
726 | 727 | ||
@@ -757,6 +758,16 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
757 | (g->can_railgate && !c->deterministic) || | 758 | (g->can_railgate && !c->deterministic) || |
758 | !skip_buffer_refcounting; | 759 | !skip_buffer_refcounting; |
759 | 760 | ||
761 | /* | ||
762 | * If User is adding increments to the pushbuffer and doing all job | ||
763 | * tracking, then no need for kernel tracking here | ||
764 | * User should ensure that all pre-requisites for fast submit are met | ||
765 | * Fail the submit if that's not the case | ||
766 | */ | ||
767 | if (need_job_tracking && | ||
768 | (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_USER_FENCE_UPDATE)) | ||
769 | return -EINVAL; | ||
770 | |||
760 | if (need_job_tracking) { | 771 | if (need_job_tracking) { |
761 | bool need_sync_framework = false; | 772 | bool need_sync_framework = false; |
762 | 773 | ||
@@ -868,6 +879,15 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
868 | goto clean_up; | 879 | goto clean_up; |
869 | } | 880 | } |
870 | 881 | ||
882 | if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_USER_FENCE_UPDATE) { | ||
883 | /* | ||
884 | * User space adds increments in the pushbuffer, so just | ||
885 | * handle the threshold book keeping in kernel by adding | ||
886 | * number of syncpoint increments to threshold | ||
887 | */ | ||
888 | c->sync->add_user_incrs(c->sync, fence->value); | ||
889 | } | ||
890 | |||
871 | if (need_job_tracking) { | 891 | if (need_job_tracking) { |
872 | err = channel_gk20a_alloc_job(c, &job); | 892 | err = channel_gk20a_alloc_job(c, &job); |
873 | if (err) | 893 | if (err) |
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c index e965a329..18d61faa 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | |||
@@ -301,6 +301,13 @@ static u64 gk20a_channel_syncpt_address(struct gk20a_channel_sync *s) | |||
301 | return sp->syncpt_buf.gpu_va; | 301 | return sp->syncpt_buf.gpu_va; |
302 | } | 302 | } |
303 | 303 | ||
304 | static u32 gk20a_channel_add_user_incrs(struct gk20a_channel_sync *s, u32 val) | ||
305 | { | ||
306 | struct gk20a_channel_syncpt *sp = | ||
307 | container_of(s, struct gk20a_channel_syncpt, ops); | ||
308 | return nvgpu_nvhost_syncpt_incr_max_ext(sp->nvhost_dev, sp->id, val); | ||
309 | } | ||
310 | |||
304 | static void gk20a_channel_syncpt_destroy(struct gk20a_channel_sync *s) | 311 | static void gk20a_channel_syncpt_destroy(struct gk20a_channel_sync *s) |
305 | { | 312 | { |
306 | struct gk20a_channel_syncpt *sp = | 313 | struct gk20a_channel_syncpt *sp = |
@@ -353,6 +360,7 @@ gk20a_channel_syncpt_create(struct channel_gk20a *c) | |||
353 | sp->ops.signal_timeline = gk20a_channel_syncpt_signal_timeline; | 360 | sp->ops.signal_timeline = gk20a_channel_syncpt_signal_timeline; |
354 | sp->ops.syncpt_id = gk20a_channel_syncpt_id; | 361 | sp->ops.syncpt_id = gk20a_channel_syncpt_id; |
355 | sp->ops.syncpt_address = gk20a_channel_syncpt_address; | 362 | sp->ops.syncpt_address = gk20a_channel_syncpt_address; |
363 | sp->ops.add_user_incrs = gk20a_channel_add_user_incrs; | ||
356 | sp->ops.destroy = gk20a_channel_syncpt_destroy; | 364 | sp->ops.destroy = gk20a_channel_syncpt_destroy; |
357 | 365 | ||
358 | return &sp->ops; | 366 | return &sp->ops; |
@@ -878,6 +886,12 @@ static u64 gk20a_channel_semaphore_syncpt_address(struct gk20a_channel_sync *s) | |||
878 | return 0; | 886 | return 0; |
879 | } | 887 | } |
880 | 888 | ||
889 | static u32 gk20a_channel_semaphore_add_user_incrs(struct gk20a_channel_sync *s, | ||
890 | u32 val) | ||
891 | { | ||
892 | return 0; | ||
893 | } | ||
894 | |||
881 | static void gk20a_channel_semaphore_destroy(struct gk20a_channel_sync *s) | 895 | static void gk20a_channel_semaphore_destroy(struct gk20a_channel_sync *s) |
882 | { | 896 | { |
883 | struct gk20a_channel_semaphore *sema = | 897 | struct gk20a_channel_semaphore *sema = |
@@ -930,6 +944,7 @@ gk20a_channel_semaphore_create(struct channel_gk20a *c) | |||
930 | sema->ops.signal_timeline = gk20a_channel_semaphore_signal_timeline; | 944 | sema->ops.signal_timeline = gk20a_channel_semaphore_signal_timeline; |
931 | sema->ops.syncpt_id = gk20a_channel_semaphore_syncpt_id; | 945 | sema->ops.syncpt_id = gk20a_channel_semaphore_syncpt_id; |
932 | sema->ops.syncpt_address = gk20a_channel_semaphore_syncpt_address; | 946 | sema->ops.syncpt_address = gk20a_channel_semaphore_syncpt_address; |
947 | sema->ops.add_user_incrs = gk20a_channel_semaphore_add_user_incrs; | ||
933 | sema->ops.destroy = gk20a_channel_semaphore_destroy; | 948 | sema->ops.destroy = gk20a_channel_semaphore_destroy; |
934 | 949 | ||
935 | return &sema->ops; | 950 | return &sema->ops; |
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h index fe1d8526..c80ebd38 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h | |||
@@ -105,6 +105,9 @@ struct gk20a_channel_sync { | |||
105 | /* Returns the sync point address of sync point or 0 if not supported */ | 105 | /* Returns the sync point address of sync point or 0 if not supported */ |
106 | u64 (*syncpt_address)(struct gk20a_channel_sync *s); | 106 | u64 (*syncpt_address)(struct gk20a_channel_sync *s); |
107 | 107 | ||
108 | /* Handle user added increments in the push buffer */ | ||
109 | u32 (*add_user_incrs)(struct gk20a_channel_sync *s, u32 val); | ||
110 | |||
108 | /* Free the resources allocated by gk20a_channel_sync_create. */ | 111 | /* Free the resources allocated by gk20a_channel_sync_create. */ |
109 | void (*destroy)(struct gk20a_channel_sync *s); | 112 | void (*destroy)(struct gk20a_channel_sync *s); |
110 | }; | 113 | }; |