summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDeepak Nibade <dnibade@nvidia.com>2018-02-21 02:49:37 -0500
committermobile promotions <svcmobile_promotions@nvidia.com>2018-02-26 06:48:14 -0500
commit0c46f8a5e112c08c172ee2c692832e1753ffbcce (patch)
treefae2114e3596507554fda4e35a8942b509017d80
parent8d5536271f989e01018a543016340a3d76a2fae2 (diff)
gpu: nvgpu: support user fence updates
Add support for user fence updates i.e. increments added by user space in pushbuffer directly Add a submit IOCTL flag NVGPU_SUBMIT_GPFIFO_FLAGS_USER_FENCE_UPDATE to indicate if User has added increments in pushbuffer If yes, number_of_increment value is received in fence.value from User If User is adding increments in the pushbuffer then we don't need to do any job tracking in the kernel So fail the submit if we evaluate need_job_tracking to true and FLAGS_USER_FENCE_UPDATE is set User is responsible for ensuring all pre-requisites for a fast submit and to prevent kernel job tracking Since user space adds increments in the pushbuffer, just handle the threshold book keeping in kernel. Bug 200326065 Jira NVGPU-179 Change-Id: Ic0f0b1aa69e3389a4c3305fb6a559c5113719e0f Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1661854 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com> Reviewed-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
-rw-r--r--drivers/gpu/nvgpu/common/linux/channel.c22
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c15
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h3
-rw-r--r--include/uapi/linux/nvgpu.h2
4 files changed, 41 insertions, 1 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/channel.c b/drivers/gpu/nvgpu/common/linux/channel.c
index a725cd6b..35fb3023 100644
--- a/drivers/gpu/nvgpu/common/linux/channel.c
+++ b/drivers/gpu/nvgpu/common/linux/channel.c
@@ -720,7 +720,8 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
720 return -EINVAL; 720 return -EINVAL;
721 721
722 if ((flags & (NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT | 722 if ((flags & (NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT |
723 NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)) && 723 NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET |
724 NVGPU_SUBMIT_GPFIFO_FLAGS_USER_FENCE_UPDATE)) &&
724 !fence) 725 !fence)
725 return -EINVAL; 726 return -EINVAL;
726 727
@@ -757,6 +758,16 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
757 (g->can_railgate && !c->deterministic) || 758 (g->can_railgate && !c->deterministic) ||
758 !skip_buffer_refcounting; 759 !skip_buffer_refcounting;
759 760
761 /*
762 * If User is adding increments to the pushbuffer and doing all job
763 * tracking, then no need for kernel tracking here
764 * User should ensure that all pre-requisites for fast submit are met
765 * Fail the submit if that's not the case
766 */
767 if (need_job_tracking &&
768 (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_USER_FENCE_UPDATE))
769 return -EINVAL;
770
760 if (need_job_tracking) { 771 if (need_job_tracking) {
761 bool need_sync_framework = false; 772 bool need_sync_framework = false;
762 773
@@ -868,6 +879,15 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
868 goto clean_up; 879 goto clean_up;
869 } 880 }
870 881
882 if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_USER_FENCE_UPDATE) {
883 /*
884 * User space adds increments in the pushbuffer, so just
885 * handle the threshold book keeping in kernel by adding
886 * number of syncpoint increments to threshold
887 */
888 c->sync->add_user_incrs(c->sync, fence->value);
889 }
890
871 if (need_job_tracking) { 891 if (need_job_tracking) {
872 err = channel_gk20a_alloc_job(c, &job); 892 err = channel_gk20a_alloc_job(c, &job);
873 if (err) 893 if (err)
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
index e965a329..18d61faa 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -301,6 +301,13 @@ static u64 gk20a_channel_syncpt_address(struct gk20a_channel_sync *s)
301 return sp->syncpt_buf.gpu_va; 301 return sp->syncpt_buf.gpu_va;
302} 302}
303 303
304static u32 gk20a_channel_add_user_incrs(struct gk20a_channel_sync *s, u32 val)
305{
306 struct gk20a_channel_syncpt *sp =
307 container_of(s, struct gk20a_channel_syncpt, ops);
308 return nvgpu_nvhost_syncpt_incr_max_ext(sp->nvhost_dev, sp->id, val);
309}
310
304static void gk20a_channel_syncpt_destroy(struct gk20a_channel_sync *s) 311static void gk20a_channel_syncpt_destroy(struct gk20a_channel_sync *s)
305{ 312{
306 struct gk20a_channel_syncpt *sp = 313 struct gk20a_channel_syncpt *sp =
@@ -353,6 +360,7 @@ gk20a_channel_syncpt_create(struct channel_gk20a *c)
353 sp->ops.signal_timeline = gk20a_channel_syncpt_signal_timeline; 360 sp->ops.signal_timeline = gk20a_channel_syncpt_signal_timeline;
354 sp->ops.syncpt_id = gk20a_channel_syncpt_id; 361 sp->ops.syncpt_id = gk20a_channel_syncpt_id;
355 sp->ops.syncpt_address = gk20a_channel_syncpt_address; 362 sp->ops.syncpt_address = gk20a_channel_syncpt_address;
363 sp->ops.add_user_incrs = gk20a_channel_add_user_incrs;
356 sp->ops.destroy = gk20a_channel_syncpt_destroy; 364 sp->ops.destroy = gk20a_channel_syncpt_destroy;
357 365
358 return &sp->ops; 366 return &sp->ops;
@@ -878,6 +886,12 @@ static u64 gk20a_channel_semaphore_syncpt_address(struct gk20a_channel_sync *s)
878 return 0; 886 return 0;
879} 887}
880 888
889static u32 gk20a_channel_semaphore_add_user_incrs(struct gk20a_channel_sync *s,
890 u32 val)
891{
892 return 0;
893}
894
881static void gk20a_channel_semaphore_destroy(struct gk20a_channel_sync *s) 895static void gk20a_channel_semaphore_destroy(struct gk20a_channel_sync *s)
882{ 896{
883 struct gk20a_channel_semaphore *sema = 897 struct gk20a_channel_semaphore *sema =
@@ -930,6 +944,7 @@ gk20a_channel_semaphore_create(struct channel_gk20a *c)
930 sema->ops.signal_timeline = gk20a_channel_semaphore_signal_timeline; 944 sema->ops.signal_timeline = gk20a_channel_semaphore_signal_timeline;
931 sema->ops.syncpt_id = gk20a_channel_semaphore_syncpt_id; 945 sema->ops.syncpt_id = gk20a_channel_semaphore_syncpt_id;
932 sema->ops.syncpt_address = gk20a_channel_semaphore_syncpt_address; 946 sema->ops.syncpt_address = gk20a_channel_semaphore_syncpt_address;
947 sema->ops.add_user_incrs = gk20a_channel_semaphore_add_user_incrs;
933 sema->ops.destroy = gk20a_channel_semaphore_destroy; 948 sema->ops.destroy = gk20a_channel_semaphore_destroy;
934 949
935 return &sema->ops; 950 return &sema->ops;
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
index fe1d8526..c80ebd38 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
@@ -105,6 +105,9 @@ struct gk20a_channel_sync {
105 /* Returns the sync point address of sync point or 0 if not supported */ 105 /* Returns the sync point address of sync point or 0 if not supported */
106 u64 (*syncpt_address)(struct gk20a_channel_sync *s); 106 u64 (*syncpt_address)(struct gk20a_channel_sync *s);
107 107
108 /* Handle user added increments in the push buffer */
109 u32 (*add_user_incrs)(struct gk20a_channel_sync *s, u32 val);
110
108 /* Free the resources allocated by gk20a_channel_sync_create. */ 111 /* Free the resources allocated by gk20a_channel_sync_create. */
109 void (*destroy)(struct gk20a_channel_sync *s); 112 void (*destroy)(struct gk20a_channel_sync *s);
110}; 113};
diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h
index cf75595a..18168158 100644
--- a/include/uapi/linux/nvgpu.h
+++ b/include/uapi/linux/nvgpu.h
@@ -1478,6 +1478,8 @@ struct nvgpu_fence {
1478#define NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING (1 << 5) 1478#define NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING (1 << 5)
1479/* expire current timeslice and reschedule runlist from front */ 1479/* expire current timeslice and reschedule runlist from front */
1480#define NVGPU_SUBMIT_GPFIFO_FLAGS_RESCHEDULE_RUNLIST (1 << 6) 1480#define NVGPU_SUBMIT_GPFIFO_FLAGS_RESCHEDULE_RUNLIST (1 << 6)
1481/* user space has added syncpoint increments in the pushbuffer */
1482#define NVGPU_SUBMIT_GPFIFO_FLAGS_USER_FENCE_UPDATE (1 << 7)
1481 1483
1482struct nvgpu_submit_gpfifo_args { 1484struct nvgpu_submit_gpfifo_args {
1483 __u64 gpfifo; 1485 __u64 gpfifo;