From 733fb79b39869665addcd80ccdf1c15f4a5aaa29 Mon Sep 17 00:00:00 2001
From: Sachit Kadle <skadle@nvidia.com>
Date: Mon, 15 Aug 2016 14:32:39 -0700
Subject: gpu: nvgpu: add support for pre-allocated resources

Add support for pre-allocation of job tracking resources
w/ new (extended) ioctl. Goal is to avoid dynamic memory
allocation in the submit path. This patch does the following:

1) Intoduces a new ioctl, NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO_EX,
which enables pre-allocation of tracking resources per job:
a) 2x priv_cmd_entry
b) 2x gk20a_fence

2) Implements circular ring buffer for job
tracking to avoid lock contention between producer
(submitter) and consumer (clean-up)

Bug 1795076

Change-Id: I6b52e5c575871107ff380f9a5790f440a6969347
Signed-off-by: Sachit Kadle <skadle@nvidia.com>
Reviewed-on: http://git-master/r/1203300
(cherry picked from commit 9fd270c22b860935dffe244753dabd87454bef39)
Reviewed-on: http://git-master/r/1223934
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/gk20a/channel_gk20a.h | 28 +++++++++++++++++++++++++---
 1 file changed, 25 insertions(+), 3 deletions(-)

(limited to 'drivers/gpu/nvgpu/gk20a/channel_gk20a.h')

diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
index 0d8746b8..8cceb6b2 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -70,6 +70,22 @@ struct channel_gk20a_job {
 	struct list_head list;
 };
 
+struct channel_gk20a_joblist {
+	struct {
+		bool enabled;
+		unsigned int length;
+		unsigned int put;
+		unsigned int get;
+		struct channel_gk20a_job *jobs;
+		struct mutex read_lock;
+	} pre_alloc;
+
+	struct {
+		struct list_head jobs;
+		spinlock_t lock;
+	} dynamic;
+};
+
 struct channel_gk20a_timeout {
 	struct delayed_work wq;
 	raw_spinlock_t lock;
@@ -115,6 +131,7 @@ struct channel_gk20a {
 	bool bound;
 	bool first_init;
 	bool vpr;
+	bool no_block;
 	bool cde;
 	pid_t pid;
 	pid_t tgid;
@@ -123,8 +140,8 @@ struct channel_gk20a {
 	int tsgid;
 	struct list_head ch_entry; /* channel's entry in TSG */
 
-	struct list_head jobs;
-	spinlock_t jobs_lock;
+	struct channel_gk20a_joblist joblist;
+	struct gk20a_allocator fence_allocator;
 
 	struct vm_gk20a *vm;
 
@@ -272,7 +289,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
 				bool force_need_sync_fence);
 
 int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
-			       struct nvgpu_alloc_gpfifo_args *args);
+			       struct nvgpu_alloc_gpfifo_ex_args *args);
 
 void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a);
 void channel_gk20a_disable(struct channel_gk20a *ch);
@@ -284,6 +301,11 @@ int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
 void channel_gk20a_enable(struct channel_gk20a *ch);
 void gk20a_channel_timeout_restart_all_channels(struct gk20a *g);
 
+bool channel_gk20a_is_prealloc_enabled(struct channel_gk20a *c);
+void channel_gk20a_joblist_lock(struct channel_gk20a *c);
+void channel_gk20a_joblist_unlock(struct channel_gk20a *c);
+bool channel_gk20a_joblist_is_empty(struct channel_gk20a *c);
+
 int gk20a_channel_get_timescale_from_timeslice(struct gk20a *g,
 		int timeslice_period,
 		int *__timeslice_timeout, int *__timeslice_scale);
-- 
cgit v1.2.2