From 733fb79b39869665addcd80ccdf1c15f4a5aaa29 Mon Sep 17 00:00:00 2001 From: Sachit Kadle Date: Mon, 15 Aug 2016 14:32:39 -0700 Subject: gpu: nvgpu: add support for pre-allocated resources Add support for pre-allocation of job tracking resources w/ new (extended) ioctl. Goal is to avoid dynamic memory allocation in the submit path. This patch does the following: 1) Intoduces a new ioctl, NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO_EX, which enables pre-allocation of tracking resources per job: a) 2x priv_cmd_entry b) 2x gk20a_fence 2) Implements circular ring buffer for job tracking to avoid lock contention between producer (submitter) and consumer (clean-up) Bug 1795076 Change-Id: I6b52e5c575871107ff380f9a5790f440a6969347 Signed-off-by: Sachit Kadle Reviewed-on: http://git-master/r/1203300 (cherry picked from commit 9fd270c22b860935dffe244753dabd87454bef39) Reviewed-on: http://git-master/r/1223934 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gk20a/channel_gk20a.h | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/nvgpu/gk20a/channel_gk20a.h') diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h index 0d8746b8..8cceb6b2 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h @@ -70,6 +70,22 @@ struct channel_gk20a_job { struct list_head list; }; +struct channel_gk20a_joblist { + struct { + bool enabled; + unsigned int length; + unsigned int put; + unsigned int get; + struct channel_gk20a_job *jobs; + struct mutex read_lock; + } pre_alloc; + + struct { + struct list_head jobs; + spinlock_t lock; + } dynamic; +}; + struct channel_gk20a_timeout { struct delayed_work wq; raw_spinlock_t lock; @@ -115,6 +131,7 @@ struct channel_gk20a { bool bound; bool first_init; bool vpr; + bool no_block; bool cde; pid_t pid; pid_t tgid; @@ -123,8 +140,8 @@ struct channel_gk20a { int tsgid; struct list_head ch_entry; /* channel's entry in TSG */ - struct list_head jobs; - spinlock_t jobs_lock; + struct channel_gk20a_joblist joblist; + struct gk20a_allocator fence_allocator; struct vm_gk20a *vm; @@ -272,7 +289,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, bool force_need_sync_fence); int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c, - struct nvgpu_alloc_gpfifo_args *args); + struct nvgpu_alloc_gpfifo_ex_args *args); void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a); void channel_gk20a_disable(struct channel_gk20a *ch); @@ -284,6 +301,11 @@ int channel_gk20a_setup_ramfc(struct channel_gk20a *c, void channel_gk20a_enable(struct channel_gk20a *ch); void gk20a_channel_timeout_restart_all_channels(struct gk20a *g); +bool channel_gk20a_is_prealloc_enabled(struct channel_gk20a *c); +void channel_gk20a_joblist_lock(struct channel_gk20a *c); +void channel_gk20a_joblist_unlock(struct channel_gk20a *c); +bool channel_gk20a_joblist_is_empty(struct channel_gk20a *c); + int gk20a_channel_get_timescale_from_timeslice(struct gk20a *g, int timeslice_period, int *__timeslice_timeout, int *__timeslice_scale); -- cgit v1.2.2