gpu: nvgpu: add support for pre-allocated resources

Add support for pre-allocation of job tracking resources w/ new (extended) ioctl. Goal is to avoid dynamic memory allocation in the submit path. This patch does the following: 1) Intoduces a new ioctl, NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO_EX, which enables pre-allocation of tracking resources per job: a) 2x priv_cmd_entry b) 2x gk20a_fence 2) Implements circular ring buffer for job tracking to avoid lock contention between producer (submitter) and consumer (clean-up) Bug 1795076 Change-Id: I6b52e5c575871107ff380f9a5790f440a6969347 Signed-off-by: Sachit Kadle <skadle@nvidia.com> Reviewed-on: http://git-master/r/1203300 (cherry picked from commit 9fd270c22b860935dffe244753dabd87454bef39) Reviewed-on: http://git-master/r/1223934 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: Sachit Kadle <skadle@nvidia.com> 2016-08-15 17:32:39 -0400
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2016-10-20 11:14:11 -0400
commit: 733fb79b39869665addcd80ccdf1c15f4a5aaa29 (patch)
tree: b3c9c1ba32406ed9c2af4bebee820e83ea6172e4 /drivers/gpu/nvgpu/gk20a/channel_gk20a.h
parent: 63e8592e06939e20c7b9e56b430353ebbee31ad6 (diff)
1 files changed, 25 insertions, 3 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
index 0d8746b8..8cceb6b2 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -70,6 +70,22 @@ struct channel_gk20a_job {
        struct list_head list;
 };
+struct channel_gk20a_joblist {
+        struct {
+                bool enabled;
+                unsigned int length;
+                unsigned int put;
+                unsigned int get;
+                struct channel_gk20a_job *jobs;
+                struct mutex read_lock;
+        } pre_alloc;
+        struct {
+                struct list_head jobs;
+                spinlock_t lock;
+        } dynamic;
+};
 struct channel_gk20a_timeout {
        struct delayed_work wq;
        raw_spinlock_t lock;
@@ -115,6 +131,7 @@ struct channel_gk20a {
        bool bound;
        bool first_init;
        bool vpr;
+        bool no_block;
        bool cde;
        pid_t pid;
        pid_t tgid;
@@ -123,8 +140,8 @@ struct channel_gk20a {
        int tsgid;
        struct list_head ch_entry; /* channel's entry in TSG */
-        struct list_head jobs;
+        struct channel_gk20a_joblist joblist;
-        spinlock_t jobs_lock;
+        struct gk20a_allocator fence_allocator;
        struct vm_gk20a *vm;
@@ -272,7 +289,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
                                bool force_need_sync_fence);
 int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
-                               struct nvgpu_alloc_gpfifo_args *args);
+                               struct nvgpu_alloc_gpfifo_ex_args *args);
 void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a);
 void channel_gk20a_disable(struct channel_gk20a *ch);
@@ -284,6 +301,11 @@ int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
 void channel_gk20a_enable(struct channel_gk20a *ch);
 void gk20a_channel_timeout_restart_all_channels(struct gk20a *g);
+bool channel_gk20a_is_prealloc_enabled(struct channel_gk20a *c);
+void channel_gk20a_joblist_lock(struct channel_gk20a *c);
+void channel_gk20a_joblist_unlock(struct channel_gk20a *c);
+bool channel_gk20a_joblist_is_empty(struct channel_gk20a *c);
 int gk20a_channel_get_timescale_from_timeslice(struct gk20a *g,
                int timeslice_period,
                int *__timeslice_timeout, int *__timeslice_scale);
author	Sachit Kadle <skadle@nvidia.com>	2016-08-15 17:32:39 -0400
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2016-10-20 11:14:11 -0400
commit	733fb79b39869665addcd80ccdf1c15f4a5aaa29 (patch)
tree	b3c9c1ba32406ed9c2af4bebee820e83ea6172e4 /drivers/gpu/nvgpu/gk20a/channel_gk20a.h
parent	63e8592e06939e20c7b9e56b430353ebbee31ad6 (diff)