summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
diff options
context:
space:
mode:
authorAingara Paramakuru <aparamakuru@nvidia.com>2016-08-17 01:28:30 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2016-09-19 15:15:21 -0400
commit3180ed70489113365203abc049223ad5956cb22e (patch)
treef5ac680b0afd9bce25379e867dd04b7c6008dd2f /drivers/gpu/nvgpu/gk20a/channel_gk20a.c
parent4ae85fd8c9bc24cac0ab2da7401df1f4cfc7a143 (diff)
gpu: nvgpu: enable fast submit path
Submit job-tracking is necessary for any of the following conditions: - pre- or post-fence functionality - channel wdt - GPU rail-gating - buffer refcounting If none of the conditions are met, then job tracking is not required and a fast submit can be done (ie. only need to write out userspace GPFIFO entries and update GP_PUT). Bug 1795076 Change-Id: If94d195e3a18a6b623e167829d291ec98a7a43a1 Signed-off-by: Aingara Paramakuru <aparamakuru@nvidia.com> Reviewed-on: http://git-master/r/1203511 (cherry picked from commit 13d7cfe94559dc52cb0bba7f9e48848e0858be81) Reviewed-on: http://git-master/r/1223066 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/channel_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c62
1 files changed, 43 insertions, 19 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index c8f8c6ce..7df794bf 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -2201,8 +2201,10 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
2201 bool skip_buffer_refcounting = (flags & 2201 bool skip_buffer_refcounting = (flags &
2202 NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING); 2202 NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING);
2203 int err = 0; 2203 int err = 0;
2204 bool need_job_tracking;
2204 struct nvgpu_gpfifo __user *user_gpfifo = args ? 2205 struct nvgpu_gpfifo __user *user_gpfifo = args ?
2205 (struct nvgpu_gpfifo __user *)(uintptr_t)args->gpfifo : NULL; 2206 (struct nvgpu_gpfifo __user *)(uintptr_t)args->gpfifo : NULL;
2207 struct gk20a_platform *platform = gk20a_get_platform(d);
2206 2208
2207 if (c->has_timedout) 2209 if (c->has_timedout)
2208 return -ETIMEDOUT; 2210 return -ETIMEDOUT;
@@ -2240,11 +2242,30 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
2240 2242
2241 gk20a_dbg_info("channel %d", c->hw_chid); 2243 gk20a_dbg_info("channel %d", c->hw_chid);
2242 2244
2243 /* gk20a_channel_update releases this ref. */ 2245 /*
2244 err = gk20a_busy(g->dev); 2246 * Job tracking is necessary for any of the following conditions:
2245 if (err) { 2247 * - pre- or post-fence functionality
2246 gk20a_err(d, "failed to host gk20a to submit gpfifo"); 2248 * - channel wdt
2247 return err; 2249 * - GPU rail-gating
2250 * - buffer refcounting
2251 *
2252 * If none of the conditions are met, then job tracking is not
2253 * required and a fast submit can be done (ie. only need to write
2254 * out userspace GPFIFO entries and update GP_PUT).
2255 */
2256 need_job_tracking = (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) ||
2257 (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) ||
2258 c->wdt_enabled ||
2259 platform->can_railgate ||
2260 !skip_buffer_refcounting;
2261
2262 if (need_job_tracking) {
2263 /* gk20a_channel_update releases this ref. */
2264 err = gk20a_busy(g->dev);
2265 if (err) {
2266 gk20a_err(d, "failed to host gk20a to submit gpfifo");
2267 return err;
2268 }
2248 } 2269 }
2249 2270
2250 trace_gk20a_channel_submit_gpfifo(dev_name(c->g->dev), 2271 trace_gk20a_channel_submit_gpfifo(dev_name(c->g->dev),
@@ -2274,12 +2295,13 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
2274 goto clean_up; 2295 goto clean_up;
2275 } 2296 }
2276 2297
2277 2298 if (need_job_tracking) {
2278 err = gk20a_submit_prepare_syncs(c, fence, &wait_cmd, &incr_cmd, 2299 err = gk20a_submit_prepare_syncs(c, fence, &wait_cmd, &incr_cmd,
2279 &pre_fence, &post_fence, 2300 &pre_fence, &post_fence,
2280 force_need_sync_fence, flags); 2301 force_need_sync_fence, flags);
2281 if (err) 2302 if (err)
2282 goto clean_up; 2303 goto clean_up;
2304 }
2283 2305
2284 if (wait_cmd) 2306 if (wait_cmd)
2285 gk20a_submit_append_priv_cmdbuf(c, wait_cmd); 2307 gk20a_submit_append_priv_cmdbuf(c, wait_cmd);
@@ -2306,8 +2328,9 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
2306 *fence_out = gk20a_fence_get(post_fence); 2328 *fence_out = gk20a_fence_get(post_fence);
2307 mutex_unlock(&c->last_submit.fence_lock); 2329 mutex_unlock(&c->last_submit.fence_lock);
2308 2330
2309 /* TODO! Check for errors... */ 2331 if (need_job_tracking)
2310 gk20a_channel_add_job(c, pre_fence, post_fence, 2332 /* TODO! Check for errors... */
2333 gk20a_channel_add_job(c, pre_fence, post_fence,
2311 wait_cmd, incr_cmd, 2334 wait_cmd, incr_cmd,
2312 skip_buffer_refcounting); 2335 skip_buffer_refcounting);
2313 2336
@@ -2317,11 +2340,11 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
2317 c->gpfifo.put); 2340 c->gpfifo.put);
2318 2341
2319 trace_gk20a_channel_submitted_gpfifo(dev_name(c->g->dev), 2342 trace_gk20a_channel_submitted_gpfifo(dev_name(c->g->dev),
2320 c->hw_chid, 2343 c->hw_chid,
2321 num_entries, 2344 num_entries,
2322 flags, 2345 flags,
2323 post_fence->syncpt_id, 2346 post_fence ? post_fence->syncpt_id : 0,
2324 post_fence->syncpt_value); 2347 post_fence ? post_fence->syncpt_value : 0);
2325 2348
2326 gk20a_dbg_info("post-submit put %d, get %d, size %d", 2349 gk20a_dbg_info("post-submit put %d, get %d, size %d",
2327 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num); 2350 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
@@ -2335,7 +2358,8 @@ clean_up:
2335 free_priv_cmdbuf(c, incr_cmd); 2358 free_priv_cmdbuf(c, incr_cmd);
2336 gk20a_fence_put(pre_fence); 2359 gk20a_fence_put(pre_fence);
2337 gk20a_fence_put(post_fence); 2360 gk20a_fence_put(post_fence);
2338 gk20a_idle(g->dev); 2361 if (need_job_tracking)
2362 gk20a_idle(g->dev);
2339 return err; 2363 return err;
2340} 2364}
2341 2365