summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/common/linux/channel.c
diff options
context:
space:
mode:
authorDeepak Nibade <dnibade@nvidia.com>2017-10-26 11:29:56 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-11-02 08:09:59 -0400
commit23c7903eff6ee1ab184dfcc62c054de1557e5b1d (patch)
treea5122028e181e5c6009f9f8b66bfbf00f69a9290 /drivers/gpu/nvgpu/common/linux/channel.c
parent5f8cfaa250f08499f587da0097f6accaa5eedf15 (diff)
gpu: nvgpu: move submit path to linux
Nvgpu submit path has a lot of dependency on Linux framework e.g. use of copy_from_user, use of structures defined in uapi/nvgpu headers, dma_buf_* calls for trace support etc Hence to keep common code independent of Linux code, move submit path to Linux directory Move below APIs to common/linux/channel.c trace_write_pushbuffer() trace_write_pushbuffer_range() gk20a_submit_prepare_syncs() gk20a_submit_append_priv_cmdbuf() gk20a_submit_append_gpfifo() gk20a_submit_channel_gpfifo() Move below APIs to common/linux/ce2.c gk20a_ce_execute_ops() Define gk20a_ce_execute_ops() in common/linux/ce2.c, and declare it in gk20a/ce2_gk20a.h since it is needed in common/mm code too Each OS needs to implement this API separately gk20a_channel_alloc_gpfifo() use sizeof(nvgpu_gpfifo) to get size of one gpfifo entry, but structure nvgpu_gpfifo is linux specific Define new nvgpu_get_gpfifo_entry_size() in linux specific code and use it in gk20a_channel_alloc_gpfifo() to get gpfifo entry size Each OS needs to implement this API separately Export some APIs from gk20a/ce2_gk20a.h and gk20a/channel_gk20a.h that are needed in linux code Jira NVGPU-259 Jira NVGPU-313 Change-Id: I360c6cb8ce4494b1e50c66af334a2a379f0d2dc4 Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1586277 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/common/linux/channel.c')
-rw-r--r--drivers/gpu/nvgpu/common/linux/channel.c648
1 files changed, 648 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/channel.c b/drivers/gpu/nvgpu/common/linux/channel.c
new file mode 100644
index 00000000..716c5820
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/channel.c
@@ -0,0 +1,648 @@
1/*
2 * Copyright (c) 2017, NVIDIA Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <nvgpu/enabled.h>
18#include <nvgpu/debug.h>
19#include <nvgpu/ltc.h>
20
21/*
22 * This is required for nvgpu_vm_find_buf() which is used in the tracing
23 * code. Once we can get and access userspace buffers without requiring
24 * direct dma_buf usage this can be removed.
25 */
26#include <nvgpu/linux/vm.h>
27
28#include "gk20a/gk20a.h"
29
30#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
31
32#include <linux/uaccess.h>
33#include <linux/dma-buf.h>
34#include <trace/events/gk20a.h>
35
36u32 nvgpu_get_gpfifo_entry_size(void)
37{
38 return sizeof(struct nvgpu_gpfifo);
39}
40
41#ifdef CONFIG_DEBUG_FS
42static void trace_write_pushbuffer(struct channel_gk20a *c,
43 struct nvgpu_gpfifo *g)
44{
45 void *mem = NULL;
46 unsigned int words;
47 u64 offset;
48 struct dma_buf *dmabuf = NULL;
49
50 if (gk20a_debug_trace_cmdbuf) {
51 u64 gpu_va = (u64)g->entry0 |
52 (u64)((u64)pbdma_gp_entry1_get_hi_v(g->entry1) << 32);
53 int err;
54
55 words = pbdma_gp_entry1_length_v(g->entry1);
56 err = nvgpu_vm_find_buf(c->vm, gpu_va, &dmabuf, &offset);
57 if (!err)
58 mem = dma_buf_vmap(dmabuf);
59 }
60
61 if (mem) {
62 u32 i;
63 /*
64 * Write in batches of 128 as there seems to be a limit
65 * of how much you can output to ftrace at once.
66 */
67 for (i = 0; i < words; i += 128U) {
68 trace_gk20a_push_cmdbuf(
69 c->g->name,
70 0,
71 min(words - i, 128U),
72 offset + i * sizeof(u32),
73 mem);
74 }
75 dma_buf_vunmap(dmabuf, mem);
76 }
77}
78#endif
79
80static void trace_write_pushbuffer_range(struct channel_gk20a *c,
81 struct nvgpu_gpfifo *g,
82 struct nvgpu_gpfifo __user *user_gpfifo,
83 int offset,
84 int count)
85{
86#ifdef CONFIG_DEBUG_FS
87 u32 size;
88 int i;
89 struct nvgpu_gpfifo *gp;
90 bool gpfifo_allocated = false;
91
92 if (!gk20a_debug_trace_cmdbuf)
93 return;
94
95 if (!g && !user_gpfifo)
96 return;
97
98 if (!g) {
99 size = count * sizeof(struct nvgpu_gpfifo);
100 if (size) {
101 g = nvgpu_big_malloc(c->g, size);
102 if (!g)
103 return;
104
105 if (copy_from_user(g, user_gpfifo, size)) {
106 nvgpu_big_free(c->g, g);
107 return;
108 }
109 }
110 gpfifo_allocated = true;
111 }
112
113 gp = g + offset;
114 for (i = 0; i < count; i++, gp++)
115 trace_write_pushbuffer(c, gp);
116
117 if (gpfifo_allocated)
118 nvgpu_big_free(c->g, g);
119#endif
120}
121
122/*
123 * Handle the submit synchronization - pre-fences and post-fences.
124 */
125static int gk20a_submit_prepare_syncs(struct channel_gk20a *c,
126 struct nvgpu_fence *fence,
127 struct channel_gk20a_job *job,
128 struct priv_cmd_entry **wait_cmd,
129 struct priv_cmd_entry **incr_cmd,
130 struct gk20a_fence **pre_fence,
131 struct gk20a_fence **post_fence,
132 bool force_need_sync_fence,
133 bool register_irq,
134 u32 flags)
135{
136 struct gk20a *g = c->g;
137 bool need_sync_fence = false;
138 bool new_sync_created = false;
139 int wait_fence_fd = -1;
140 int err = 0;
141 bool need_wfi = !(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI);
142 bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(c);
143
144 /*
145 * If user wants to always allocate sync_fence_fds then respect that;
146 * otherwise, allocate sync_fence_fd based on user flags.
147 */
148 if (force_need_sync_fence)
149 need_sync_fence = true;
150
151 if (g->aggressive_sync_destroy_thresh) {
152 nvgpu_mutex_acquire(&c->sync_lock);
153 if (!c->sync) {
154 c->sync = gk20a_channel_sync_create(c);
155 if (!c->sync) {
156 err = -ENOMEM;
157 nvgpu_mutex_release(&c->sync_lock);
158 goto fail;
159 }
160 new_sync_created = true;
161 }
162 nvgpu_atomic_inc(&c->sync->refcount);
163 nvgpu_mutex_release(&c->sync_lock);
164 }
165
166 if (g->ops.fifo.resetup_ramfc && new_sync_created) {
167 err = g->ops.fifo.resetup_ramfc(c);
168 if (err)
169 goto fail;
170 }
171
172 /*
173 * Optionally insert syncpt wait in the beginning of gpfifo submission
174 * when user requested and the wait hasn't expired. Validate that the id
175 * makes sense, elide if not. The only reason this isn't being
176 * unceremoniously killed is to keep running some tests which trigger
177 * this condition.
178 */
179 if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) {
180 job->pre_fence = gk20a_alloc_fence(c);
181 if (!job->pre_fence) {
182 err = -ENOMEM;
183 goto fail;
184 }
185
186 if (!pre_alloc_enabled)
187 job->wait_cmd = nvgpu_kzalloc(g,
188 sizeof(struct priv_cmd_entry));
189
190 if (!job->wait_cmd) {
191 err = -ENOMEM;
192 goto clean_up_pre_fence;
193 }
194
195 if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) {
196 wait_fence_fd = fence->id;
197 err = c->sync->wait_fd(c->sync, wait_fence_fd,
198 job->wait_cmd, job->pre_fence);
199 } else {
200 err = c->sync->wait_syncpt(c->sync, fence->id,
201 fence->value, job->wait_cmd,
202 job->pre_fence);
203 }
204
205 if (!err) {
206 if (job->wait_cmd->valid)
207 *wait_cmd = job->wait_cmd;
208 *pre_fence = job->pre_fence;
209 } else
210 goto clean_up_wait_cmd;
211 }
212
213 if ((flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) &&
214 (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE))
215 need_sync_fence = true;
216
217 /*
218 * Always generate an increment at the end of a GPFIFO submission. This
219 * is used to keep track of method completion for idle railgating. The
220 * sync_pt/semaphore PB is added to the GPFIFO later on in submit.
221 */
222 job->post_fence = gk20a_alloc_fence(c);
223 if (!job->post_fence) {
224 err = -ENOMEM;
225 goto clean_up_wait_cmd;
226 }
227 if (!pre_alloc_enabled)
228 job->incr_cmd = nvgpu_kzalloc(g, sizeof(struct priv_cmd_entry));
229
230 if (!job->incr_cmd) {
231 err = -ENOMEM;
232 goto clean_up_post_fence;
233 }
234
235 if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
236 err = c->sync->incr_user(c->sync, wait_fence_fd, job->incr_cmd,
237 job->post_fence, need_wfi, need_sync_fence,
238 register_irq);
239 else
240 err = c->sync->incr(c->sync, job->incr_cmd,
241 job->post_fence, need_sync_fence,
242 register_irq);
243 if (!err) {
244 *incr_cmd = job->incr_cmd;
245 *post_fence = job->post_fence;
246 } else
247 goto clean_up_incr_cmd;
248
249 return 0;
250
251clean_up_incr_cmd:
252 free_priv_cmdbuf(c, job->incr_cmd);
253 if (!pre_alloc_enabled)
254 job->incr_cmd = NULL;
255clean_up_post_fence:
256 gk20a_fence_put(job->post_fence);
257 job->post_fence = NULL;
258clean_up_wait_cmd:
259 free_priv_cmdbuf(c, job->wait_cmd);
260 if (!pre_alloc_enabled)
261 job->wait_cmd = NULL;
262clean_up_pre_fence:
263 gk20a_fence_put(job->pre_fence);
264 job->pre_fence = NULL;
265fail:
266 *wait_cmd = NULL;
267 *pre_fence = NULL;
268 return err;
269}
270
271static void gk20a_submit_append_priv_cmdbuf(struct channel_gk20a *c,
272 struct priv_cmd_entry *cmd)
273{
274 struct gk20a *g = c->g;
275 struct nvgpu_mem *gpfifo_mem = &c->gpfifo.mem;
276 struct nvgpu_gpfifo x = {
277 .entry0 = u64_lo32(cmd->gva),
278 .entry1 = u64_hi32(cmd->gva) |
279 pbdma_gp_entry1_length_f(cmd->size)
280 };
281
282 nvgpu_mem_wr_n(g, gpfifo_mem, c->gpfifo.put * sizeof(x),
283 &x, sizeof(x));
284
285 if (cmd->mem->aperture == APERTURE_SYSMEM)
286 trace_gk20a_push_cmdbuf(g->name, 0, cmd->size, 0,
287 cmd->mem->cpu_va + cmd->off * sizeof(u32));
288
289 c->gpfifo.put = (c->gpfifo.put + 1) & (c->gpfifo.entry_num - 1);
290}
291
292/*
293 * Copy source gpfifo entries into the gpfifo ring buffer, potentially
294 * splitting into two memcpys to handle wrap-around.
295 */
296static int gk20a_submit_append_gpfifo(struct channel_gk20a *c,
297 struct nvgpu_gpfifo *kern_gpfifo,
298 struct nvgpu_gpfifo __user *user_gpfifo,
299 u32 num_entries)
300{
301 /* byte offsets */
302 u32 gpfifo_size = c->gpfifo.entry_num * sizeof(struct nvgpu_gpfifo);
303 u32 len = num_entries * sizeof(struct nvgpu_gpfifo);
304 u32 start = c->gpfifo.put * sizeof(struct nvgpu_gpfifo);
305 u32 end = start + len; /* exclusive */
306 struct nvgpu_mem *gpfifo_mem = &c->gpfifo.mem;
307 struct nvgpu_gpfifo *cpu_src;
308 int err;
309
310 if (user_gpfifo && !c->gpfifo.pipe) {
311 /*
312 * This path (from userspace to sysmem) is special in order to
313 * avoid two copies unnecessarily (from user to pipe, then from
314 * pipe to gpu sysmem buffer).
315 *
316 * As a special case, the pipe buffer exists if PRAMIN writes
317 * are forced, although the buffers may not be in vidmem in
318 * that case.
319 */
320 if (end > gpfifo_size) {
321 /* wrap-around */
322 int length0 = gpfifo_size - start;
323 int length1 = len - length0;
324 void __user *user2 = (u8 __user *)user_gpfifo + length0;
325
326 err = copy_from_user(gpfifo_mem->cpu_va + start,
327 user_gpfifo, length0);
328 if (err)
329 return err;
330
331 err = copy_from_user(gpfifo_mem->cpu_va,
332 user2, length1);
333 if (err)
334 return err;
335 } else {
336 err = copy_from_user(gpfifo_mem->cpu_va + start,
337 user_gpfifo, len);
338 if (err)
339 return err;
340 }
341
342 trace_write_pushbuffer_range(c, NULL, user_gpfifo,
343 0, num_entries);
344 goto out;
345 } else if (user_gpfifo) {
346 /* from userspace to vidmem or sysmem when pramin forced, use
347 * the common copy path below */
348 err = copy_from_user(c->gpfifo.pipe, user_gpfifo, len);
349 if (err)
350 return err;
351
352 cpu_src = c->gpfifo.pipe;
353 } else {
354 /* from kernel to either sysmem or vidmem, don't need
355 * copy_from_user so use the common path below */
356 cpu_src = kern_gpfifo;
357 }
358
359 if (end > gpfifo_size) {
360 /* wrap-around */
361 int length0 = gpfifo_size - start;
362 int length1 = len - length0;
363 void *src2 = (u8 *)cpu_src + length0;
364
365 nvgpu_mem_wr_n(c->g, gpfifo_mem, start, cpu_src, length0);
366 nvgpu_mem_wr_n(c->g, gpfifo_mem, 0, src2, length1);
367 } else {
368 nvgpu_mem_wr_n(c->g, gpfifo_mem, start, cpu_src, len);
369
370 }
371
372 trace_write_pushbuffer_range(c, cpu_src, NULL, 0, num_entries);
373
374out:
375 c->gpfifo.put = (c->gpfifo.put + num_entries) &
376 (c->gpfifo.entry_num - 1);
377
378 return 0;
379}
380
381int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
382 struct nvgpu_gpfifo *gpfifo,
383 struct nvgpu_submit_gpfifo_args *args,
384 u32 num_entries,
385 u32 flags,
386 struct nvgpu_fence *fence,
387 struct gk20a_fence **fence_out,
388 bool force_need_sync_fence,
389 struct fifo_profile_gk20a *profile)
390{
391 struct gk20a *g = c->g;
392 struct priv_cmd_entry *wait_cmd = NULL;
393 struct priv_cmd_entry *incr_cmd = NULL;
394 struct gk20a_fence *pre_fence = NULL;
395 struct gk20a_fence *post_fence = NULL;
396 struct channel_gk20a_job *job = NULL;
397 /* we might need two extra gpfifo entries - one for pre fence
398 * and one for post fence. */
399 const int extra_entries = 2;
400 bool skip_buffer_refcounting = (flags &
401 NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING);
402 int err = 0;
403 bool need_job_tracking;
404 bool need_deferred_cleanup = false;
405 struct nvgpu_gpfifo __user *user_gpfifo = args ?
406 (struct nvgpu_gpfifo __user *)(uintptr_t)args->gpfifo : NULL;
407
408 if (nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING))
409 return -ENODEV;
410
411 if (c->has_timedout)
412 return -ETIMEDOUT;
413
414 if (!nvgpu_mem_is_valid(&c->gpfifo.mem))
415 return -ENOMEM;
416
417 /* fifo not large enough for request. Return error immediately.
418 * Kernel can insert gpfifo entries before and after user gpfifos.
419 * So, add extra_entries in user request. Also, HW with fifo size N
420 * can accept only N-1 entreis and so the below condition */
421 if (c->gpfifo.entry_num - 1 < num_entries + extra_entries) {
422 nvgpu_err(g, "not enough gpfifo space allocated");
423 return -ENOMEM;
424 }
425
426 if (!gpfifo && !args)
427 return -EINVAL;
428
429 if ((flags & (NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT |
430 NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)) &&
431 !fence)
432 return -EINVAL;
433
434 /* an address space needs to have been bound at this point. */
435 if (!gk20a_channel_as_bound(c)) {
436 nvgpu_err(g,
437 "not bound to an address space at time of gpfifo"
438 " submission.");
439 return -EINVAL;
440 }
441
442 if (profile)
443 profile->timestamp[PROFILE_ENTRY] = sched_clock();
444
445 /* update debug settings */
446 nvgpu_ltc_sync_enabled(g);
447
448 gk20a_dbg_info("channel %d", c->chid);
449
450 /*
451 * Job tracking is necessary for any of the following conditions:
452 * - pre- or post-fence functionality
453 * - channel wdt
454 * - GPU rail-gating with non-deterministic channels
455 * - buffer refcounting
456 *
457 * If none of the conditions are met, then job tracking is not
458 * required and a fast submit can be done (ie. only need to write
459 * out userspace GPFIFO entries and update GP_PUT).
460 */
461 need_job_tracking = (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) ||
462 (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) ||
463 c->wdt_enabled ||
464 (g->can_railgate && !c->deterministic) ||
465 !skip_buffer_refcounting;
466
467 if (need_job_tracking) {
468 bool need_sync_framework = false;
469
470 /*
471 * If the channel is to have deterministic latency and
472 * job tracking is required, the channel must have
473 * pre-allocated resources. Otherwise, we fail the submit here
474 */
475 if (c->deterministic && !channel_gk20a_is_prealloc_enabled(c))
476 return -EINVAL;
477
478 need_sync_framework = force_need_sync_fence ||
479 gk20a_channel_sync_needs_sync_framework(g) ||
480 (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE &&
481 (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT ||
482 flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET));
483
484 /*
485 * Deferred clean-up is necessary for any of the following
486 * conditions:
487 * - channel's deterministic flag is not set
488 * - dependency on sync framework, which could make the
489 * behavior of the clean-up operation non-deterministic
490 * (should not be performed in the submit path)
491 * - channel wdt
492 * - GPU rail-gating with non-deterministic channels
493 * - buffer refcounting
494 *
495 * If none of the conditions are met, then deferred clean-up
496 * is not required, and we clean-up one job-tracking
497 * resource in the submit path.
498 */
499 need_deferred_cleanup = !c->deterministic ||
500 need_sync_framework ||
501 c->wdt_enabled ||
502 (g->can_railgate &&
503 !c->deterministic) ||
504 !skip_buffer_refcounting;
505
506 /*
507 * For deterministic channels, we don't allow deferred clean_up
508 * processing to occur. In cases we hit this, we fail the submit
509 */
510 if (c->deterministic && need_deferred_cleanup)
511 return -EINVAL;
512
513 if (!c->deterministic) {
514 /*
515 * Get a power ref unless this is a deterministic
516 * channel that holds them during the channel lifetime.
517 * This one is released by gk20a_channel_clean_up_jobs,
518 * via syncpt or sema interrupt, whichever is used.
519 */
520 err = gk20a_busy(g);
521 if (err) {
522 nvgpu_err(g,
523 "failed to host gk20a to submit gpfifo, process %s",
524 current->comm);
525 return err;
526 }
527 }
528
529 if (!need_deferred_cleanup) {
530 /* clean up a single job */
531 gk20a_channel_clean_up_jobs(c, false);
532 }
533 }
534
535
536 /* Grab access to HW to deal with do_idle */
537 if (c->deterministic)
538 nvgpu_rwsem_down_read(&g->deterministic_busy);
539
540 trace_gk20a_channel_submit_gpfifo(g->name,
541 c->chid,
542 num_entries,
543 flags,
544 fence ? fence->id : 0,
545 fence ? fence->value : 0);
546
547 gk20a_dbg_info("pre-submit put %d, get %d, size %d",
548 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
549
550 /*
551 * Make sure we have enough space for gpfifo entries. Check cached
552 * values first and then read from HW. If no space, return EAGAIN
553 * and let userpace decide to re-try request or not.
554 */
555 if (nvgpu_gp_free_count(c) < num_entries + extra_entries) {
556 if (nvgpu_get_gp_free_count(c) < num_entries + extra_entries) {
557 err = -EAGAIN;
558 goto clean_up;
559 }
560 }
561
562 if (c->has_timedout) {
563 err = -ETIMEDOUT;
564 goto clean_up;
565 }
566
567 if (need_job_tracking) {
568 err = channel_gk20a_alloc_job(c, &job);
569 if (err)
570 goto clean_up;
571
572 err = gk20a_submit_prepare_syncs(c, fence, job,
573 &wait_cmd, &incr_cmd,
574 &pre_fence, &post_fence,
575 force_need_sync_fence,
576 need_deferred_cleanup,
577 flags);
578 if (err)
579 goto clean_up_job;
580 }
581
582 if (profile)
583 profile->timestamp[PROFILE_JOB_TRACKING] = sched_clock();
584
585 if (wait_cmd)
586 gk20a_submit_append_priv_cmdbuf(c, wait_cmd);
587
588 if (gpfifo || user_gpfifo)
589 err = gk20a_submit_append_gpfifo(c, gpfifo, user_gpfifo,
590 num_entries);
591 if (err)
592 goto clean_up_job;
593
594 /*
595 * And here's where we add the incr_cmd we generated earlier. It should
596 * always run!
597 */
598 if (incr_cmd)
599 gk20a_submit_append_priv_cmdbuf(c, incr_cmd);
600
601 if (fence_out)
602 *fence_out = gk20a_fence_get(post_fence);
603
604 if (need_job_tracking)
605 /* TODO! Check for errors... */
606 gk20a_channel_add_job(c, job, skip_buffer_refcounting);
607 if (profile)
608 profile->timestamp[PROFILE_APPEND] = sched_clock();
609
610 g->ops.fifo.userd_gp_put(g, c);
611
612 if ((NVGPU_SUBMIT_GPFIFO_FLAGS_RESCHEDULE_RUNLIST & flags) &&
613 g->ops.fifo.reschedule_runlist)
614 g->ops.fifo.reschedule_runlist(g, c->runlist_id);
615
616 /* No hw access beyond this point */
617 if (c->deterministic)
618 nvgpu_rwsem_up_read(&g->deterministic_busy);
619
620 trace_gk20a_channel_submitted_gpfifo(g->name,
621 c->chid,
622 num_entries,
623 flags,
624 post_fence ? post_fence->syncpt_id : 0,
625 post_fence ? post_fence->syncpt_value : 0);
626
627 gk20a_dbg_info("post-submit put %d, get %d, size %d",
628 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
629
630 if (profile)
631 profile->timestamp[PROFILE_END] = sched_clock();
632 gk20a_dbg_fn("done");
633 return err;
634
635clean_up_job:
636 channel_gk20a_free_job(c, job);
637clean_up:
638 gk20a_dbg_fn("fail");
639 gk20a_fence_put(pre_fence);
640 gk20a_fence_put(post_fence);
641 if (c->deterministic)
642 nvgpu_rwsem_up_read(&g->deterministic_busy);
643 else if (need_deferred_cleanup)
644 gk20a_idle(g);
645
646 return err;
647}
648