summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/nvgpu/common/linux/channel.c22
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c15
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h3
-rw-r--r--include/uapi/linux/nvgpu.h2
4 files changed, 41 insertions, 1 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/channel.c b/drivers/gpu/nvgpu/common/linux/channel.c
index a725cd6b..35fb3023 100644
--- a/drivers/gpu/nvgpu/common/linux/channel.c
+++ b/drivers/gpu/nvgpu/common/linux/channel.c
@@ -720,7 +720,8 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
720 return -EINVAL; 720 return -EINVAL;
721 721
722 if ((flags & (NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT | 722 if ((flags & (NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT |
723 NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)) && 723 NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET |
724 NVGPU_SUBMIT_GPFIFO_FLAGS_USER_FENCE_UPDATE)) &&
724 !fence) 725 !fence)
725 return -EINVAL; 726 return -EINVAL;
726 727
@@ -757,6 +758,16 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
757 (g->can_railgate && !c->deterministic) || 758 (g->can_railgate && !c->deterministic) ||
758 !skip_buffer_refcounting; 759 !skip_buffer_refcounting;
759 760
761 /*
762 * If User is adding increments to the pushbuffer and doing all job
763 * tracking, then no need for kernel tracking here
764 * User should ensure that all pre-requisites for fast submit are met
765 * Fail the submit if that's not the case
766 */
767 if (need_job_tracking &&
768 (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_USER_FENCE_UPDATE))
769 return -EINVAL;
770
760 if (need_job_tracking) { 771 if (need_job_tracking) {
761 bool need_sync_framework = false; 772 bool need_sync_framework = false;
762 773
@@ -868,6 +879,15 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
868 goto clean_up; 879 goto clean_up;
869 } 880 }
870 881
882 if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_USER_FENCE_UPDATE) {
883 /*
884 * User space adds increments in the pushbuffer, so just
885 * handle the threshold book keeping in kernel by adding
886 * number of syncpoint increments to threshold
887 */
888 c->sync->add_user_incrs(c->sync, fence->value);
889 }
890
871 if (need_job_tracking) { 891 if (need_job_tracking) {
872 err = channel_gk20a_alloc_job(c, &job); 892 err = channel_gk20a_alloc_job(c, &job);
873 if (err) 893 if (err)
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
index e965a329..18d61faa 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -301,6 +301,13 @@ static u64 gk20a_channel_syncpt_address(struct gk20a_channel_sync *s)
301 return sp->syncpt_buf.gpu_va; 301 return sp->syncpt_buf.gpu_va;
302} 302}
303 303
304static u32 gk20a_channel_add_user_incrs(struct gk20a_channel_sync *s, u32 val)
305{
306 struct gk20a_channel_syncpt *sp =
307 container_of(s, struct gk20a_channel_syncpt, ops);
308 return nvgpu_nvhost_syncpt_incr_max_ext(sp->nvhost_dev, sp->id, val);
309}
310
304static void gk20a_channel_syncpt_destroy(struct gk20a_channel_sync *s) 311static void gk20a_channel_syncpt_destroy(struct gk20a_channel_sync *s)
305{ 312{
306 struct gk20a_channel_syncpt *sp = 313 struct gk20a_channel_syncpt *sp =
@@ -353,6 +360,7 @@ gk20a_channel_syncpt_create(struct channel_gk20a *c)
353 sp->ops.signal_timeline = gk20a_channel_syncpt_signal_timeline; 360 sp->ops.signal_timeline = gk20a_channel_syncpt_signal_timeline;
354 sp->ops.syncpt_id = gk20a_channel_syncpt_id; 361 sp->ops.syncpt_id = gk20a_channel_syncpt_id;
355 sp->ops.syncpt_address = gk20a_channel_syncpt_address; 362 sp->ops.syncpt_address = gk20a_channel_syncpt_address;
363 sp->ops.add_user_incrs = gk20a_channel_add_user_incrs;
356 sp->ops.destroy = gk20a_channel_syncpt_destroy; 364 sp->ops.destroy = gk20a_channel_syncpt_destroy;
357 365
358 return &sp->ops; 366 return &sp->ops;
@@ -878,6 +886,12 @@ static u64 gk20a_channel_semaphore_syncpt_address(struct gk20a_channel_sync *s)
878 return 0; 886 return 0;
879} 887}
880 888
889static u32 gk20a_channel_semaphore_add_user_incrs(struct gk20a_channel_sync *s,
890 u32 val)
891{
892 return 0;
893}
894
881static void gk20a_channel_semaphore_destroy(struct gk20a_channel_sync *s) 895static void gk20a_channel_semaphore_destroy(struct gk20a_channel_sync *s)
882{ 896{
883 struct gk20a_channel_semaphore *sema = 897 struct gk20a_channel_semaphore *sema =
@@ -930,6 +944,7 @@ gk20a_channel_semaphore_create(struct channel_gk20a *c)
930 sema->ops.signal_timeline = gk20a_channel_semaphore_signal_timeline; 944 sema->ops.signal_timeline = gk20a_channel_semaphore_signal_timeline;
931 sema->ops.syncpt_id = gk20a_channel_semaphore_syncpt_id; 945 sema->ops.syncpt_id = gk20a_channel_semaphore_syncpt_id;
932 sema->ops.syncpt_address = gk20a_channel_semaphore_syncpt_address; 946 sema->ops.syncpt_address = gk20a_channel_semaphore_syncpt_address;
947 sema->ops.add_user_incrs = gk20a_channel_semaphore_add_user_incrs;
933 sema->ops.destroy = gk20a_channel_semaphore_destroy; 948 sema->ops.destroy = gk20a_channel_semaphore_destroy;
934 949
935 return &sema->ops; 950 return &sema->ops;
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
index fe1d8526..c80ebd38 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
@@ -105,6 +105,9 @@ struct gk20a_channel_sync {
105 /* Returns the sync point address of sync point or 0 if not supported */ 105 /* Returns the sync point address of sync point or 0 if not supported */
106 u64 (*syncpt_address)(struct gk20a_channel_sync *s); 106 u64 (*syncpt_address)(struct gk20a_channel_sync *s);
107 107
108 /* Handle user added increments in the push buffer */
109 u32 (*add_user_incrs)(struct gk20a_channel_sync *s, u32 val);
110
108 /* Free the resources allocated by gk20a_channel_sync_create. */ 111 /* Free the resources allocated by gk20a_channel_sync_create. */
109 void (*destroy)(struct gk20a_channel_sync *s); 112 void (*destroy)(struct gk20a_channel_sync *s);
110}; 113};
diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h
index cf75595a..18168158 100644
--- a/include/uapi/linux/nvgpu.h
+++ b/include/uapi/linux/nvgpu.h
@@ -1478,6 +1478,8 @@ struct nvgpu_fence {
1478#define NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING (1 << 5) 1478#define NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING (1 << 5)
1479/* expire current timeslice and reschedule runlist from front */ 1479/* expire current timeslice and reschedule runlist from front */
1480#define NVGPU_SUBMIT_GPFIFO_FLAGS_RESCHEDULE_RUNLIST (1 << 6) 1480#define NVGPU_SUBMIT_GPFIFO_FLAGS_RESCHEDULE_RUNLIST (1 << 6)
1481/* user space has added syncpoint increments in the pushbuffer */
1482#define NVGPU_SUBMIT_GPFIFO_FLAGS_USER_FENCE_UPDATE (1 << 7)
1481 1483
1482struct nvgpu_submit_gpfifo_args { 1484struct nvgpu_submit_gpfifo_args {
1483 __u64 gpfifo; 1485 __u64 gpfifo;