diff options
author | Sachit Kadle <skadle@nvidia.com> | 2016-08-15 17:32:39 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2016-10-20 11:14:11 -0400 |
commit | 733fb79b39869665addcd80ccdf1c15f4a5aaa29 (patch) | |
tree | b3c9c1ba32406ed9c2af4bebee820e83ea6172e4 /drivers/gpu/nvgpu/gk20a/channel_gk20a.h | |
parent | 63e8592e06939e20c7b9e56b430353ebbee31ad6 (diff) |
gpu: nvgpu: add support for pre-allocated resources
Add support for pre-allocation of job tracking resources
w/ new (extended) ioctl. Goal is to avoid dynamic memory
allocation in the submit path. This patch does the following:
1) Intoduces a new ioctl, NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO_EX,
which enables pre-allocation of tracking resources per job:
a) 2x priv_cmd_entry
b) 2x gk20a_fence
2) Implements circular ring buffer for job
tracking to avoid lock contention between producer
(submitter) and consumer (clean-up)
Bug 1795076
Change-Id: I6b52e5c575871107ff380f9a5790f440a6969347
Signed-off-by: Sachit Kadle <skadle@nvidia.com>
Reviewed-on: http://git-master/r/1203300
(cherry picked from commit 9fd270c22b860935dffe244753dabd87454bef39)
Reviewed-on: http://git-master/r/1223934
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/channel_gk20a.h')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.h | 28 |
1 files changed, 25 insertions, 3 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h index 0d8746b8..8cceb6b2 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h | |||
@@ -70,6 +70,22 @@ struct channel_gk20a_job { | |||
70 | struct list_head list; | 70 | struct list_head list; |
71 | }; | 71 | }; |
72 | 72 | ||
73 | struct channel_gk20a_joblist { | ||
74 | struct { | ||
75 | bool enabled; | ||
76 | unsigned int length; | ||
77 | unsigned int put; | ||
78 | unsigned int get; | ||
79 | struct channel_gk20a_job *jobs; | ||
80 | struct mutex read_lock; | ||
81 | } pre_alloc; | ||
82 | |||
83 | struct { | ||
84 | struct list_head jobs; | ||
85 | spinlock_t lock; | ||
86 | } dynamic; | ||
87 | }; | ||
88 | |||
73 | struct channel_gk20a_timeout { | 89 | struct channel_gk20a_timeout { |
74 | struct delayed_work wq; | 90 | struct delayed_work wq; |
75 | raw_spinlock_t lock; | 91 | raw_spinlock_t lock; |
@@ -115,6 +131,7 @@ struct channel_gk20a { | |||
115 | bool bound; | 131 | bool bound; |
116 | bool first_init; | 132 | bool first_init; |
117 | bool vpr; | 133 | bool vpr; |
134 | bool no_block; | ||
118 | bool cde; | 135 | bool cde; |
119 | pid_t pid; | 136 | pid_t pid; |
120 | pid_t tgid; | 137 | pid_t tgid; |
@@ -123,8 +140,8 @@ struct channel_gk20a { | |||
123 | int tsgid; | 140 | int tsgid; |
124 | struct list_head ch_entry; /* channel's entry in TSG */ | 141 | struct list_head ch_entry; /* channel's entry in TSG */ |
125 | 142 | ||
126 | struct list_head jobs; | 143 | struct channel_gk20a_joblist joblist; |
127 | spinlock_t jobs_lock; | 144 | struct gk20a_allocator fence_allocator; |
128 | 145 | ||
129 | struct vm_gk20a *vm; | 146 | struct vm_gk20a *vm; |
130 | 147 | ||
@@ -272,7 +289,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
272 | bool force_need_sync_fence); | 289 | bool force_need_sync_fence); |
273 | 290 | ||
274 | int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c, | 291 | int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c, |
275 | struct nvgpu_alloc_gpfifo_args *args); | 292 | struct nvgpu_alloc_gpfifo_ex_args *args); |
276 | 293 | ||
277 | void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a); | 294 | void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a); |
278 | void channel_gk20a_disable(struct channel_gk20a *ch); | 295 | void channel_gk20a_disable(struct channel_gk20a *ch); |
@@ -284,6 +301,11 @@ int channel_gk20a_setup_ramfc(struct channel_gk20a *c, | |||
284 | void channel_gk20a_enable(struct channel_gk20a *ch); | 301 | void channel_gk20a_enable(struct channel_gk20a *ch); |
285 | void gk20a_channel_timeout_restart_all_channels(struct gk20a *g); | 302 | void gk20a_channel_timeout_restart_all_channels(struct gk20a *g); |
286 | 303 | ||
304 | bool channel_gk20a_is_prealloc_enabled(struct channel_gk20a *c); | ||
305 | void channel_gk20a_joblist_lock(struct channel_gk20a *c); | ||
306 | void channel_gk20a_joblist_unlock(struct channel_gk20a *c); | ||
307 | bool channel_gk20a_joblist_is_empty(struct channel_gk20a *c); | ||
308 | |||
287 | int gk20a_channel_get_timescale_from_timeslice(struct gk20a *g, | 309 | int gk20a_channel_get_timescale_from_timeslice(struct gk20a *g, |
288 | int timeslice_period, | 310 | int timeslice_period, |
289 | int *__timeslice_timeout, int *__timeslice_scale); | 311 | int *__timeslice_timeout, int *__timeslice_scale); |