summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorKonsta Holtta <kholtta@nvidia.com>2018-06-25 05:35:42 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2018-06-27 21:40:16 -0400
commit7998233b77a343d002b699d5f348bbeb243e16f5 (patch)
treeaa24afcc414be8fbccf6991804f69946e2b72525 /drivers
parent2ac6fb4253fa815ed17f09a01141b938c826dac9 (diff)
gpu: nvgpu: move submit code to common
To finish OS unification of the submit path, move the gk20a_submit_channel_gpfifo* functions to a file that's accessible also outside Linux code. Also change the prefix of the submit functions from gk20a_ to nvgpu_. Jira NVGPU-705 Change-Id: I8ca355d1eb69771fb016c7a21fc7f102ca7967d7 Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1760421 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/nvgpu/Makefile1
-rw-r--r--drivers/gpu/nvgpu/Makefile.sources1
-rw-r--r--drivers/gpu/nvgpu/common/fifo/submit.c577
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.h2
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h1
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/channel.h52
-rw-r--r--drivers/gpu/nvgpu/os/linux/cde.c3
-rw-r--r--drivers/gpu/nvgpu/os/linux/ce2.c3
-rw-r--r--drivers/gpu/nvgpu/os/linux/channel.c551
-rw-r--r--drivers/gpu/nvgpu/os/linux/channel.h15
-rw-r--r--drivers/gpu/nvgpu/os/linux/ioctl_channel.c5
11 files changed, 642 insertions, 569 deletions
diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile
index faf17a91..61636ff5 100644
--- a/drivers/gpu/nvgpu/Makefile
+++ b/drivers/gpu/nvgpu/Makefile
@@ -177,6 +177,7 @@ nvgpu-y += \
177 common/clock_gating/gv11b_gating_reglist.o \ 177 common/clock_gating/gv11b_gating_reglist.o \
178 common/sim.o \ 178 common/sim.o \
179 common/sim_pci.o \ 179 common/sim_pci.o \
180 common/fifo/submit.o \
180 gk20a/gk20a.o \ 181 gk20a/gk20a.o \
181 gk20a/ce2_gk20a.o \ 182 gk20a/ce2_gk20a.o \
182 gk20a/fifo_gk20a.o \ 183 gk20a/fifo_gk20a.o \
diff --git a/drivers/gpu/nvgpu/Makefile.sources b/drivers/gpu/nvgpu/Makefile.sources
index cad9c1e3..942fddea 100644
--- a/drivers/gpu/nvgpu/Makefile.sources
+++ b/drivers/gpu/nvgpu/Makefile.sources
@@ -80,6 +80,7 @@ srcs := common/mm/nvgpu_allocator.c \
80 common/clock_gating/gv11b_gating_reglist.c \ 80 common/clock_gating/gv11b_gating_reglist.c \
81 common/clock_gating/gp106_gating_reglist.c \ 81 common/clock_gating/gp106_gating_reglist.c \
82 common/clock_gating/gv100_gating_reglist.c \ 82 common/clock_gating/gv100_gating_reglist.c \
83 common/fifo/submit.c \
83 boardobj/boardobj.c \ 84 boardobj/boardobj.c \
84 boardobj/boardobjgrp.c \ 85 boardobj/boardobjgrp.c \
85 boardobj/boardobjgrpmask.c \ 86 boardobj/boardobjgrpmask.c \
diff --git a/drivers/gpu/nvgpu/common/fifo/submit.c b/drivers/gpu/nvgpu/common/fifo/submit.c
new file mode 100644
index 00000000..daeee608
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/fifo/submit.c
@@ -0,0 +1,577 @@
1/*
2 * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23#include <nvgpu/channel.h>
24#include <nvgpu/ltc.h>
25#include <nvgpu/os_sched.h>
26
27#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
28
29#include "gk20a/gk20a.h"
30#include "gk20a/channel_gk20a.h"
31
32#include <trace/events/gk20a.h>
33
34/*
35 * Handle the submit synchronization - pre-fences and post-fences.
36 */
37static int nvgpu_submit_prepare_syncs(struct channel_gk20a *c,
38 struct nvgpu_channel_fence *fence,
39 struct channel_gk20a_job *job,
40 struct priv_cmd_entry **wait_cmd,
41 struct priv_cmd_entry **incr_cmd,
42 struct gk20a_fence **post_fence,
43 bool register_irq,
44 u32 flags)
45{
46 struct gk20a *g = c->g;
47 bool need_sync_fence = false;
48 bool new_sync_created = false;
49 int wait_fence_fd = -1;
50 int err = 0;
51 bool need_wfi = !(flags & NVGPU_SUBMIT_FLAGS_SUPPRESS_WFI);
52 bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(c);
53
54 if (g->aggressive_sync_destroy_thresh) {
55 nvgpu_mutex_acquire(&c->sync_lock);
56 if (!c->sync) {
57 c->sync = gk20a_channel_sync_create(c, false);
58 if (!c->sync) {
59 err = -ENOMEM;
60 nvgpu_mutex_release(&c->sync_lock);
61 goto fail;
62 }
63 new_sync_created = true;
64 }
65 nvgpu_atomic_inc(&c->sync->refcount);
66 nvgpu_mutex_release(&c->sync_lock);
67 }
68
69 if (g->ops.fifo.resetup_ramfc && new_sync_created) {
70 err = g->ops.fifo.resetup_ramfc(c);
71 if (err)
72 goto fail;
73 }
74
75 /*
76 * Optionally insert syncpt/semaphore wait in the beginning of gpfifo
77 * submission when user requested and the wait hasn't expired.
78 */
79 if (flags & NVGPU_SUBMIT_FLAGS_FENCE_WAIT) {
80 int max_wait_cmds = c->deterministic ? 1 : 0;
81
82 if (!pre_alloc_enabled)
83 job->wait_cmd = nvgpu_kzalloc(g,
84 sizeof(struct priv_cmd_entry));
85
86 if (!job->wait_cmd) {
87 err = -ENOMEM;
88 goto fail;
89 }
90
91 if (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE) {
92 wait_fence_fd = fence->id;
93 err = c->sync->wait_fd(c->sync, wait_fence_fd,
94 job->wait_cmd, max_wait_cmds);
95 } else {
96 err = c->sync->wait_syncpt(c->sync, fence->id,
97 fence->value,
98 job->wait_cmd);
99 }
100
101 if (err)
102 goto clean_up_wait_cmd;
103
104 if (job->wait_cmd->valid)
105 *wait_cmd = job->wait_cmd;
106 }
107
108 if ((flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) &&
109 (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE))
110 need_sync_fence = true;
111
112 /*
113 * Always generate an increment at the end of a GPFIFO submission. This
114 * is used to keep track of method completion for idle railgating. The
115 * sync_pt/semaphore PB is added to the GPFIFO later on in submit.
116 */
117 job->post_fence = gk20a_alloc_fence(c);
118 if (!job->post_fence) {
119 err = -ENOMEM;
120 goto clean_up_wait_cmd;
121 }
122 if (!pre_alloc_enabled)
123 job->incr_cmd = nvgpu_kzalloc(g, sizeof(struct priv_cmd_entry));
124
125 if (!job->incr_cmd) {
126 err = -ENOMEM;
127 goto clean_up_post_fence;
128 }
129
130 if (flags & NVGPU_SUBMIT_FLAGS_FENCE_GET)
131 err = c->sync->incr_user(c->sync, wait_fence_fd, job->incr_cmd,
132 job->post_fence, need_wfi, need_sync_fence,
133 register_irq);
134 else
135 err = c->sync->incr(c->sync, job->incr_cmd,
136 job->post_fence, need_sync_fence,
137 register_irq);
138 if (!err) {
139 *incr_cmd = job->incr_cmd;
140 *post_fence = job->post_fence;
141 } else
142 goto clean_up_incr_cmd;
143
144 return 0;
145
146clean_up_incr_cmd:
147 free_priv_cmdbuf(c, job->incr_cmd);
148 if (!pre_alloc_enabled)
149 job->incr_cmd = NULL;
150clean_up_post_fence:
151 gk20a_fence_put(job->post_fence);
152 job->post_fence = NULL;
153clean_up_wait_cmd:
154 if (job->wait_cmd)
155 free_priv_cmdbuf(c, job->wait_cmd);
156 if (!pre_alloc_enabled)
157 job->wait_cmd = NULL;
158fail:
159 *wait_cmd = NULL;
160 return err;
161}
162
163static void nvgpu_submit_append_priv_cmdbuf(struct channel_gk20a *c,
164 struct priv_cmd_entry *cmd)
165{
166 struct gk20a *g = c->g;
167 struct nvgpu_mem *gpfifo_mem = &c->gpfifo.mem;
168 struct nvgpu_gpfifo_entry x = {
169 .entry0 = u64_lo32(cmd->gva),
170 .entry1 = u64_hi32(cmd->gva) |
171 pbdma_gp_entry1_length_f(cmd->size)
172 };
173
174 nvgpu_mem_wr_n(g, gpfifo_mem, c->gpfifo.put * sizeof(x),
175 &x, sizeof(x));
176
177 if (cmd->mem->aperture == APERTURE_SYSMEM)
178 trace_gk20a_push_cmdbuf(g->name, 0, cmd->size, 0,
179 (u32 *)cmd->mem->cpu_va + cmd->off);
180
181 c->gpfifo.put = (c->gpfifo.put + 1) & (c->gpfifo.entry_num - 1);
182}
183
184static int nvgpu_submit_append_gpfifo_user_direct(struct channel_gk20a *c,
185 struct nvgpu_gpfifo_userdata userdata,
186 u32 num_entries)
187{
188 struct gk20a *g = c->g;
189 struct nvgpu_gpfifo_entry *gpfifo_cpu = c->gpfifo.mem.cpu_va;
190 u32 gpfifo_size = c->gpfifo.entry_num;
191 u32 len = num_entries;
192 u32 start = c->gpfifo.put;
193 u32 end = start + len; /* exclusive */
194 int err;
195
196 if (end > gpfifo_size) {
197 /* wrap-around */
198 int length0 = gpfifo_size - start;
199 int length1 = len - length0;
200
201 err = g->os_channel.copy_user_gpfifo(
202 gpfifo_cpu + start, userdata,
203 0, length0);
204 if (err)
205 return err;
206
207 err = g->os_channel.copy_user_gpfifo(
208 gpfifo_cpu, userdata,
209 length0, length1);
210 if (err)
211 return err;
212 } else {
213 err = g->os_channel.copy_user_gpfifo(
214 gpfifo_cpu + start, userdata,
215 0, len);
216 if (err)
217 return err;
218 }
219
220 return 0;
221}
222
223static void nvgpu_submit_append_gpfifo_common(struct channel_gk20a *c,
224 struct nvgpu_gpfifo_entry *src, u32 num_entries)
225{
226 struct gk20a *g = c->g;
227 struct nvgpu_mem *gpfifo_mem = &c->gpfifo.mem;
228 /* in bytes */
229 u32 gpfifo_size =
230 c->gpfifo.entry_num * sizeof(struct nvgpu_gpfifo_entry);
231 u32 len = num_entries * sizeof(struct nvgpu_gpfifo_entry);
232 u32 start = c->gpfifo.put * sizeof(struct nvgpu_gpfifo_entry);
233 u32 end = start + len; /* exclusive */
234
235 if (end > gpfifo_size) {
236 /* wrap-around */
237 int length0 = gpfifo_size - start;
238 int length1 = len - length0;
239 struct nvgpu_gpfifo_entry *src2 = src + length0;
240
241 nvgpu_mem_wr_n(g, gpfifo_mem, start, src, length0);
242 nvgpu_mem_wr_n(g, gpfifo_mem, 0, src2, length1);
243 } else {
244 nvgpu_mem_wr_n(g, gpfifo_mem, start, src, len);
245 }
246}
247
248/*
249 * Copy source gpfifo entries into the gpfifo ring buffer, potentially
250 * splitting into two memcpys to handle wrap-around.
251 */
252static int nvgpu_submit_append_gpfifo(struct channel_gk20a *c,
253 struct nvgpu_gpfifo_entry *kern_gpfifo,
254 struct nvgpu_gpfifo_userdata userdata,
255 u32 num_entries)
256{
257 struct gk20a *g = c->g;
258 int err;
259
260 if (!kern_gpfifo && !c->gpfifo.pipe) {
261 /*
262 * This path (from userspace to sysmem) is special in order to
263 * avoid two copies unnecessarily (from user to pipe, then from
264 * pipe to gpu sysmem buffer).
265 */
266 err = nvgpu_submit_append_gpfifo_user_direct(c, userdata,
267 num_entries);
268 if (err)
269 return err;
270 } else if (!kern_gpfifo) {
271 /* from userspace to vidmem, use the common path */
272 err = g->os_channel.copy_user_gpfifo(c->gpfifo.pipe, userdata,
273 0, num_entries);
274 if (err)
275 return err;
276
277 nvgpu_submit_append_gpfifo_common(c, c->gpfifo.pipe,
278 num_entries);
279 } else {
280 /* from kernel to either sysmem or vidmem, don't need
281 * copy_user_gpfifo so use the common path */
282 nvgpu_submit_append_gpfifo_common(c, kern_gpfifo, num_entries);
283 }
284
285 trace_write_pushbuffers(c, num_entries);
286
287 c->gpfifo.put = (c->gpfifo.put + num_entries) &
288 (c->gpfifo.entry_num - 1);
289
290 return 0;
291}
292
293static int nvgpu_submit_channel_gpfifo(struct channel_gk20a *c,
294 struct nvgpu_gpfifo_entry *gpfifo,
295 struct nvgpu_gpfifo_userdata userdata,
296 u32 num_entries,
297 u32 flags,
298 struct nvgpu_channel_fence *fence,
299 struct gk20a_fence **fence_out,
300 struct fifo_profile_gk20a *profile)
301{
302 struct gk20a *g = c->g;
303 struct priv_cmd_entry *wait_cmd = NULL;
304 struct priv_cmd_entry *incr_cmd = NULL;
305 struct gk20a_fence *post_fence = NULL;
306 struct channel_gk20a_job *job = NULL;
307 /* we might need two extra gpfifo entries - one for pre fence
308 * and one for post fence. */
309 const int extra_entries = 2;
310 bool skip_buffer_refcounting = (flags &
311 NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING);
312 int err = 0;
313 bool need_job_tracking;
314 bool need_deferred_cleanup = false;
315
316 if (nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING))
317 return -ENODEV;
318
319 if (c->has_timedout)
320 return -ETIMEDOUT;
321
322 if (!nvgpu_mem_is_valid(&c->gpfifo.mem))
323 return -ENOMEM;
324
325 /* fifo not large enough for request. Return error immediately.
326 * Kernel can insert gpfifo entries before and after user gpfifos.
327 * So, add extra_entries in user request. Also, HW with fifo size N
328 * can accept only N-1 entreis and so the below condition */
329 if (c->gpfifo.entry_num - 1 < num_entries + extra_entries) {
330 nvgpu_err(g, "not enough gpfifo space allocated");
331 return -ENOMEM;
332 }
333
334 if ((flags & (NVGPU_SUBMIT_FLAGS_FENCE_WAIT |
335 NVGPU_SUBMIT_FLAGS_FENCE_GET)) &&
336 !fence)
337 return -EINVAL;
338
339 /* an address space needs to have been bound at this point. */
340 if (!gk20a_channel_as_bound(c)) {
341 nvgpu_err(g,
342 "not bound to an address space at time of gpfifo"
343 " submission.");
344 return -EINVAL;
345 }
346
347 gk20a_fifo_profile_snapshot(profile, PROFILE_ENTRY);
348
349 /* update debug settings */
350 nvgpu_ltc_sync_enabled(g);
351
352 nvgpu_log_info(g, "channel %d", c->chid);
353
354 /*
355 * Job tracking is necessary for any of the following conditions:
356 * - pre- or post-fence functionality
357 * - channel wdt
358 * - GPU rail-gating with non-deterministic channels
359 * - buffer refcounting
360 *
361 * If none of the conditions are met, then job tracking is not
362 * required and a fast submit can be done (ie. only need to write
363 * out userspace GPFIFO entries and update GP_PUT).
364 */
365 need_job_tracking = (flags & NVGPU_SUBMIT_FLAGS_FENCE_WAIT) ||
366 (flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) ||
367 c->timeout.enabled ||
368 (g->can_railgate && !c->deterministic) ||
369 !skip_buffer_refcounting;
370
371 if (need_job_tracking) {
372 bool need_sync_framework = false;
373
374 /*
375 * If the channel is to have deterministic latency and
376 * job tracking is required, the channel must have
377 * pre-allocated resources. Otherwise, we fail the submit here
378 */
379 if (c->deterministic && !channel_gk20a_is_prealloc_enabled(c))
380 return -EINVAL;
381
382 need_sync_framework =
383 gk20a_channel_sync_needs_sync_framework(g) ||
384 (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE &&
385 flags & NVGPU_SUBMIT_FLAGS_FENCE_GET);
386
387 /*
388 * Deferred clean-up is necessary for any of the following
389 * conditions:
390 * - channel's deterministic flag is not set
391 * - dependency on sync framework, which could make the
392 * behavior of the clean-up operation non-deterministic
393 * (should not be performed in the submit path)
394 * - channel wdt
395 * - GPU rail-gating with non-deterministic channels
396 * - buffer refcounting
397 *
398 * If none of the conditions are met, then deferred clean-up
399 * is not required, and we clean-up one job-tracking
400 * resource in the submit path.
401 */
402 need_deferred_cleanup = !c->deterministic ||
403 need_sync_framework ||
404 c->timeout.enabled ||
405 (g->can_railgate &&
406 !c->deterministic) ||
407 !skip_buffer_refcounting;
408
409 /*
410 * For deterministic channels, we don't allow deferred clean_up
411 * processing to occur. In cases we hit this, we fail the submit
412 */
413 if (c->deterministic && need_deferred_cleanup)
414 return -EINVAL;
415
416 if (!c->deterministic) {
417 /*
418 * Get a power ref unless this is a deterministic
419 * channel that holds them during the channel lifetime.
420 * This one is released by gk20a_channel_clean_up_jobs,
421 * via syncpt or sema interrupt, whichever is used.
422 */
423 err = gk20a_busy(g);
424 if (err) {
425 nvgpu_err(g,
426 "failed to host gk20a to submit gpfifo");
427 nvgpu_print_current(g, NULL, NVGPU_ERROR);
428 return err;
429 }
430 }
431
432 if (!need_deferred_cleanup) {
433 /* clean up a single job */
434 gk20a_channel_clean_up_jobs(c, false);
435 }
436 }
437
438
439 /* Grab access to HW to deal with do_idle */
440 if (c->deterministic)
441 nvgpu_rwsem_down_read(&g->deterministic_busy);
442
443 if (c->deterministic && c->deterministic_railgate_allowed) {
444 /*
445 * Nope - this channel has dropped its own power ref. As
446 * deterministic submits don't hold power on per each submitted
447 * job like normal ones do, the GPU might railgate any time now
448 * and thus submit is disallowed.
449 */
450 err = -EINVAL;
451 goto clean_up;
452 }
453
454 trace_gk20a_channel_submit_gpfifo(g->name,
455 c->chid,
456 num_entries,
457 flags,
458 fence ? fence->id : 0,
459 fence ? fence->value : 0);
460
461 nvgpu_log_info(g, "pre-submit put %d, get %d, size %d",
462 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
463
464 /*
465 * Make sure we have enough space for gpfifo entries. Check cached
466 * values first and then read from HW. If no space, return EAGAIN
467 * and let userpace decide to re-try request or not.
468 */
469 if (nvgpu_gp_free_count(c) < num_entries + extra_entries) {
470 if (nvgpu_get_gp_free_count(c) < num_entries + extra_entries) {
471 err = -EAGAIN;
472 goto clean_up;
473 }
474 }
475
476 if (c->has_timedout) {
477 err = -ETIMEDOUT;
478 goto clean_up;
479 }
480
481 if (need_job_tracking) {
482 err = channel_gk20a_alloc_job(c, &job);
483 if (err)
484 goto clean_up;
485
486 err = nvgpu_submit_prepare_syncs(c, fence, job,
487 &wait_cmd, &incr_cmd,
488 &post_fence,
489 need_deferred_cleanup,
490 flags);
491 if (err)
492 goto clean_up_job;
493 }
494
495 gk20a_fifo_profile_snapshot(profile, PROFILE_JOB_TRACKING);
496
497 if (wait_cmd)
498 nvgpu_submit_append_priv_cmdbuf(c, wait_cmd);
499
500 err = nvgpu_submit_append_gpfifo(c, gpfifo, userdata,
501 num_entries);
502 if (err)
503 goto clean_up_job;
504
505 /*
506 * And here's where we add the incr_cmd we generated earlier. It should
507 * always run!
508 */
509 if (incr_cmd)
510 nvgpu_submit_append_priv_cmdbuf(c, incr_cmd);
511
512 if (fence_out)
513 *fence_out = gk20a_fence_get(post_fence);
514
515 if (need_job_tracking)
516 /* TODO! Check for errors... */
517 gk20a_channel_add_job(c, job, skip_buffer_refcounting);
518 gk20a_fifo_profile_snapshot(profile, PROFILE_APPEND);
519
520 g->ops.fifo.userd_gp_put(g, c);
521
522 /* No hw access beyond this point */
523 if (c->deterministic)
524 nvgpu_rwsem_up_read(&g->deterministic_busy);
525
526 trace_gk20a_channel_submitted_gpfifo(g->name,
527 c->chid,
528 num_entries,
529 flags,
530 post_fence ? post_fence->syncpt_id : 0,
531 post_fence ? post_fence->syncpt_value : 0);
532
533 nvgpu_log_info(g, "post-submit put %d, get %d, size %d",
534 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
535
536 gk20a_fifo_profile_snapshot(profile, PROFILE_END);
537
538 nvgpu_log_fn(g, "done");
539 return err;
540
541clean_up_job:
542 channel_gk20a_free_job(c, job);
543clean_up:
544 nvgpu_log_fn(g, "fail");
545 gk20a_fence_put(post_fence);
546 if (c->deterministic)
547 nvgpu_rwsem_up_read(&g->deterministic_busy);
548 else if (need_deferred_cleanup)
549 gk20a_idle(g);
550
551 return err;
552}
553
554int nvgpu_submit_channel_gpfifo_user(struct channel_gk20a *c,
555 struct nvgpu_gpfifo_userdata userdata,
556 u32 num_entries,
557 u32 flags,
558 struct nvgpu_channel_fence *fence,
559 struct gk20a_fence **fence_out,
560 struct fifo_profile_gk20a *profile)
561{
562 return nvgpu_submit_channel_gpfifo(c, NULL, userdata, num_entries,
563 flags, fence, fence_out, profile);
564}
565
566int nvgpu_submit_channel_gpfifo_kernel(struct channel_gk20a *c,
567 struct nvgpu_gpfifo_entry *gpfifo,
568 u32 num_entries,
569 u32 flags,
570 struct nvgpu_channel_fence *fence,
571 struct gk20a_fence **fence_out)
572{
573 struct nvgpu_gpfifo_userdata userdata = { NULL, NULL };
574
575 return nvgpu_submit_channel_gpfifo(c, gpfifo, userdata, num_entries,
576 flags, fence, fence_out, NULL);
577}
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
index aa37db62..78325019 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -47,7 +47,7 @@ struct fifo_profile_gk20a;
47#define NVGPU_GPFIFO_FLAGS_REPLAYABLE_FAULTS_ENABLE (1 << 2) 47#define NVGPU_GPFIFO_FLAGS_REPLAYABLE_FAULTS_ENABLE (1 << 2)
48#define NVGPU_GPFIFO_FLAGS_USERMODE_SUPPORT (1 << 3) 48#define NVGPU_GPFIFO_FLAGS_USERMODE_SUPPORT (1 << 3)
49 49
50/* Flags to be passed to gk20a_submit_channel_gpfifo() */ 50/* Flags to be passed to nvgpu_submit_channel_gpfifo() */
51#define NVGPU_SUBMIT_FLAGS_FENCE_WAIT (1 << 0) 51#define NVGPU_SUBMIT_FLAGS_FENCE_WAIT (1 << 0)
52#define NVGPU_SUBMIT_FLAGS_FENCE_GET (1 << 1) 52#define NVGPU_SUBMIT_FLAGS_FENCE_GET (1 << 1)
53#define NVGPU_SUBMIT_FLAGS_HW_FORMAT (1 << 2) 53#define NVGPU_SUBMIT_FLAGS_HW_FORMAT (1 << 2)
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 9061236e..3c25f8fb 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -150,6 +150,7 @@ enum nvgpu_unit;
150enum nvgpu_flush_op; 150enum nvgpu_flush_op;
151 151
152struct _resmgr_context; 152struct _resmgr_context;
153struct nvgpu_gpfifo_entry;
153 154
154struct nvgpu_gpfifo_userdata { 155struct nvgpu_gpfifo_userdata {
155 struct nvgpu_gpfifo_entry __user *entries; 156 struct nvgpu_gpfifo_entry __user *entries;
diff --git a/drivers/gpu/nvgpu/include/nvgpu/channel.h b/drivers/gpu/nvgpu/include/nvgpu/channel.h
new file mode 100644
index 00000000..604083d4
--- /dev/null
+++ b/drivers/gpu/nvgpu/include/nvgpu/channel.h
@@ -0,0 +1,52 @@
1/*
2 * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23#include <nvgpu/types.h>
24
25#include "gk20a/gk20a.h"
26
27struct nvgpu_channel_fence;
28struct gk20a_fence;
29struct fifo_profile_gk20a;
30
31int nvgpu_submit_channel_gpfifo_user(struct channel_gk20a *c,
32 struct nvgpu_gpfifo_userdata userdata,
33 u32 num_entries,
34 u32 flags,
35 struct nvgpu_channel_fence *fence,
36 struct gk20a_fence **fence_out,
37 struct fifo_profile_gk20a *profile);
38
39int nvgpu_submit_channel_gpfifo_kernel(struct channel_gk20a *c,
40 struct nvgpu_gpfifo_entry *gpfifo,
41 u32 num_entries,
42 u32 flags,
43 struct nvgpu_channel_fence *fence,
44 struct gk20a_fence **fence_out);
45
46#ifdef CONFIG_DEBUG_FS
47void trace_write_pushbuffers(struct channel_gk20a *c, int count);
48#else
49static inline void trace_write_pushbuffers(struct channel_gk20a *c, int count)
50{
51}
52#endif
diff --git a/drivers/gpu/nvgpu/os/linux/cde.c b/drivers/gpu/nvgpu/os/linux/cde.c
index 052a1d21..39b7d1f5 100644
--- a/drivers/gpu/nvgpu/os/linux/cde.c
+++ b/drivers/gpu/nvgpu/os/linux/cde.c
@@ -32,6 +32,7 @@
32#include <nvgpu/bug.h> 32#include <nvgpu/bug.h>
33#include <nvgpu/firmware.h> 33#include <nvgpu/firmware.h>
34#include <nvgpu/os_sched.h> 34#include <nvgpu/os_sched.h>
35#include <nvgpu/channel.h>
35 36
36#include <nvgpu/linux/vm.h> 37#include <nvgpu/linux/vm.h>
37 38
@@ -783,7 +784,7 @@ static int gk20a_cde_execute_buffer(struct gk20a_cde_ctx *cde_ctx,
783 return -ENOSYS; 784 return -ENOSYS;
784 } 785 }
785 786
786 return gk20a_submit_channel_gpfifo_kernel(cde_ctx->ch, gpfifo, 787 return nvgpu_submit_channel_gpfifo_kernel(cde_ctx->ch, gpfifo,
787 num_entries, flags, fence, fence_out); 788 num_entries, flags, fence, fence_out);
788} 789}
789 790
diff --git a/drivers/gpu/nvgpu/os/linux/ce2.c b/drivers/gpu/nvgpu/os/linux/ce2.c
index 8f20091b..0b43c0d1 100644
--- a/drivers/gpu/nvgpu/os/linux/ce2.c
+++ b/drivers/gpu/nvgpu/os/linux/ce2.c
@@ -15,6 +15,7 @@
15 */ 15 */
16 16
17#include <nvgpu/types.h> 17#include <nvgpu/types.h>
18#include <nvgpu/channel.h>
18 19
19#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h> 20#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
20 21
@@ -130,7 +131,7 @@ int gk20a_ce_execute_ops(struct gk20a *g,
130 131
131 nvgpu_smp_wmb(); 132 nvgpu_smp_wmb();
132 133
133 ret = gk20a_submit_channel_gpfifo_kernel(ce_ctx->ch, &gpfifo, 134 ret = nvgpu_submit_channel_gpfifo_kernel(ce_ctx->ch, &gpfifo,
134 1, submit_flags, &fence, &ce_cmd_buf_fence_out); 135 1, submit_flags, &fence, &ce_cmd_buf_fence_out);
135 136
136 if (!ret) { 137 if (!ret) {
diff --git a/drivers/gpu/nvgpu/os/linux/channel.c b/drivers/gpu/nvgpu/os/linux/channel.c
index 391950af..fef44f2b 100644
--- a/drivers/gpu/nvgpu/os/linux/channel.c
+++ b/drivers/gpu/nvgpu/os/linux/channel.c
@@ -16,7 +16,6 @@
16 16
17#include <nvgpu/enabled.h> 17#include <nvgpu/enabled.h>
18#include <nvgpu/debug.h> 18#include <nvgpu/debug.h>
19#include <nvgpu/ltc.h>
20#include <nvgpu/error_notifier.h> 19#include <nvgpu/error_notifier.h>
21#include <nvgpu/os_sched.h> 20#include <nvgpu/os_sched.h>
22 21
@@ -489,11 +488,9 @@ static void trace_write_pushbuffer(struct channel_gk20a *c,
489 dma_buf_vunmap(dmabuf, mem); 488 dma_buf_vunmap(dmabuf, mem);
490 } 489 }
491} 490}
492#endif
493 491
494static void trace_write_pushbuffers(struct channel_gk20a *c, u32 count) 492void trace_write_pushbuffers(struct channel_gk20a *c, u32 count)
495{ 493{
496#ifdef CONFIG_DEBUG_FS
497 struct nvgpu_gpfifo_entry *gp = c->gpfifo.mem.cpu_va; 494 struct nvgpu_gpfifo_entry *gp = c->gpfifo.mem.cpu_va;
498 u32 n = c->gpfifo.entry_num; 495 u32 n = c->gpfifo.entry_num;
499 u32 start = c->gpfifo.put; 496 u32 start = c->gpfifo.put;
@@ -507,549 +504,5 @@ static void trace_write_pushbuffers(struct channel_gk20a *c, u32 count)
507 504
508 for (i = 0; i < count; i++) 505 for (i = 0; i < count; i++)
509 trace_write_pushbuffer(c, &gp[(start + i) % n]); 506 trace_write_pushbuffer(c, &gp[(start + i) % n]);
510#endif
511}
512
513/*
514 * Handle the submit synchronization - pre-fences and post-fences.
515 */
516static int gk20a_submit_prepare_syncs(struct channel_gk20a *c,
517 struct nvgpu_channel_fence *fence,
518 struct channel_gk20a_job *job,
519 struct priv_cmd_entry **wait_cmd,
520 struct priv_cmd_entry **incr_cmd,
521 struct gk20a_fence **post_fence,
522 bool register_irq,
523 u32 flags)
524{
525 struct gk20a *g = c->g;
526 bool need_sync_fence = false;
527 bool new_sync_created = false;
528 int wait_fence_fd = -1;
529 int err = 0;
530 bool need_wfi = !(flags & NVGPU_SUBMIT_FLAGS_SUPPRESS_WFI);
531 bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(c);
532
533 if (g->aggressive_sync_destroy_thresh) {
534 nvgpu_mutex_acquire(&c->sync_lock);
535 if (!c->sync) {
536 c->sync = gk20a_channel_sync_create(c, false);
537 if (!c->sync) {
538 err = -ENOMEM;
539 nvgpu_mutex_release(&c->sync_lock);
540 goto fail;
541 }
542 new_sync_created = true;
543 }
544 nvgpu_atomic_inc(&c->sync->refcount);
545 nvgpu_mutex_release(&c->sync_lock);
546 }
547
548 if (g->ops.fifo.resetup_ramfc && new_sync_created) {
549 err = g->ops.fifo.resetup_ramfc(c);
550 if (err)
551 goto fail;
552 }
553
554 /*
555 * Optionally insert syncpt/semaphore wait in the beginning of gpfifo
556 * submission when user requested and the wait hasn't expired.
557 */
558 if (flags & NVGPU_SUBMIT_FLAGS_FENCE_WAIT) {
559 int max_wait_cmds = c->deterministic ? 1 : 0;
560
561 if (!pre_alloc_enabled)
562 job->wait_cmd = nvgpu_kzalloc(g,
563 sizeof(struct priv_cmd_entry));
564
565 if (!job->wait_cmd) {
566 err = -ENOMEM;
567 goto fail;
568 }
569
570 if (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE) {
571 wait_fence_fd = fence->id;
572 err = c->sync->wait_fd(c->sync, wait_fence_fd,
573 job->wait_cmd, max_wait_cmds);
574 } else {
575 err = c->sync->wait_syncpt(c->sync, fence->id,
576 fence->value,
577 job->wait_cmd);
578 }
579
580 if (err)
581 goto clean_up_wait_cmd;
582
583 if (job->wait_cmd->valid)
584 *wait_cmd = job->wait_cmd;
585 }
586
587 if ((flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) &&
588 (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE))
589 need_sync_fence = true;
590
591 /*
592 * Always generate an increment at the end of a GPFIFO submission. This
593 * is used to keep track of method completion for idle railgating. The
594 * sync_pt/semaphore PB is added to the GPFIFO later on in submit.
595 */
596 job->post_fence = gk20a_alloc_fence(c);
597 if (!job->post_fence) {
598 err = -ENOMEM;
599 goto clean_up_wait_cmd;
600 }
601 if (!pre_alloc_enabled)
602 job->incr_cmd = nvgpu_kzalloc(g, sizeof(struct priv_cmd_entry));
603
604 if (!job->incr_cmd) {
605 err = -ENOMEM;
606 goto clean_up_post_fence;
607 }
608
609 if (flags & NVGPU_SUBMIT_FLAGS_FENCE_GET)
610 err = c->sync->incr_user(c->sync, wait_fence_fd, job->incr_cmd,
611 job->post_fence, need_wfi, need_sync_fence,
612 register_irq);
613 else
614 err = c->sync->incr(c->sync, job->incr_cmd,
615 job->post_fence, need_sync_fence,
616 register_irq);
617 if (!err) {
618 *incr_cmd = job->incr_cmd;
619 *post_fence = job->post_fence;
620 } else
621 goto clean_up_incr_cmd;
622
623 return 0;
624
625clean_up_incr_cmd:
626 free_priv_cmdbuf(c, job->incr_cmd);
627 if (!pre_alloc_enabled)
628 job->incr_cmd = NULL;
629clean_up_post_fence:
630 gk20a_fence_put(job->post_fence);
631 job->post_fence = NULL;
632clean_up_wait_cmd:
633 if (job->wait_cmd)
634 free_priv_cmdbuf(c, job->wait_cmd);
635 if (!pre_alloc_enabled)
636 job->wait_cmd = NULL;
637fail:
638 *wait_cmd = NULL;
639 return err;
640}
641
642static void gk20a_submit_append_priv_cmdbuf(struct channel_gk20a *c,
643 struct priv_cmd_entry *cmd)
644{
645 struct gk20a *g = c->g;
646 struct nvgpu_mem *gpfifo_mem = &c->gpfifo.mem;
647 struct nvgpu_gpfifo_entry x = {
648 .entry0 = u64_lo32(cmd->gva),
649 .entry1 = u64_hi32(cmd->gva) |
650 pbdma_gp_entry1_length_f(cmd->size)
651 };
652
653 nvgpu_mem_wr_n(g, gpfifo_mem, c->gpfifo.put * sizeof(x),
654 &x, sizeof(x));
655
656 if (cmd->mem->aperture == APERTURE_SYSMEM)
657 trace_gk20a_push_cmdbuf(g->name, 0, cmd->size, 0,
658 (u32 *)cmd->mem->cpu_va + cmd->off);
659
660 c->gpfifo.put = (c->gpfifo.put + 1) & (c->gpfifo.entry_num - 1);
661}
662
663static int nvgpu_submit_append_gpfifo_user_direct(struct channel_gk20a *c,
664 struct nvgpu_gpfifo_userdata userdata,
665 u32 num_entries)
666{
667 struct gk20a *g = c->g;
668 struct nvgpu_gpfifo_entry *gpfifo_cpu = c->gpfifo.mem.cpu_va;
669 u32 gpfifo_size = c->gpfifo.entry_num;
670 u32 len = num_entries;
671 u32 start = c->gpfifo.put;
672 u32 end = start + len; /* exclusive */
673 int err;
674
675 if (end > gpfifo_size) {
676 /* wrap-around */
677 int length0 = gpfifo_size - start;
678 int length1 = len - length0;
679
680 err = g->os_channel.copy_user_gpfifo(
681 gpfifo_cpu + start, userdata,
682 0, length0);
683 if (err)
684 return err;
685
686 err = g->os_channel.copy_user_gpfifo(
687 gpfifo_cpu, userdata,
688 length0, length1);
689 if (err)
690 return err;
691 } else {
692 err = g->os_channel.copy_user_gpfifo(
693 gpfifo_cpu + start, userdata,
694 0, len);
695 if (err)
696 return err;
697 }
698
699 return 0;
700}
701
702static void nvgpu_submit_append_gpfifo_common(struct channel_gk20a *c,
703 struct nvgpu_gpfifo_entry *src, u32 num_entries)
704{
705 struct gk20a *g = c->g;
706 struct nvgpu_mem *gpfifo_mem = &c->gpfifo.mem;
707 /* in bytes */
708 u32 gpfifo_size =
709 c->gpfifo.entry_num * sizeof(struct nvgpu_gpfifo_entry);
710 u32 len = num_entries * sizeof(struct nvgpu_gpfifo_entry);
711 u32 start = c->gpfifo.put * sizeof(struct nvgpu_gpfifo_entry);
712 u32 end = start + len; /* exclusive */
713
714 if (end > gpfifo_size) {
715 /* wrap-around */
716 int length0 = gpfifo_size - start;
717 int length1 = len - length0;
718 struct nvgpu_gpfifo_entry *src2 = src + length0;
719
720 nvgpu_mem_wr_n(g, gpfifo_mem, start, src, length0);
721 nvgpu_mem_wr_n(g, gpfifo_mem, 0, src2, length1);
722 } else {
723 nvgpu_mem_wr_n(g, gpfifo_mem, start, src, len);
724 }
725}
726
727/*
728 * Copy source gpfifo entries into the gpfifo ring buffer, potentially
729 * splitting into two memcpys to handle wrap-around.
730 */
731static int nvgpu_submit_append_gpfifo(struct channel_gk20a *c,
732 struct nvgpu_gpfifo_entry *kern_gpfifo,
733 struct nvgpu_gpfifo_userdata userdata,
734 u32 num_entries)
735{
736 struct gk20a *g = c->g;
737 int err;
738
739 if (!kern_gpfifo && !c->gpfifo.pipe) {
740 /*
741 * This path (from userspace to sysmem) is special in order to
742 * avoid two copies unnecessarily (from user to pipe, then from
743 * pipe to gpu sysmem buffer).
744 */
745 err = nvgpu_submit_append_gpfifo_user_direct(c, userdata,
746 num_entries);
747 if (err)
748 return err;
749 } else if (!kern_gpfifo) {
750 /* from userspace to vidmem, use the common path */
751 err = g->os_channel.copy_user_gpfifo(c->gpfifo.pipe, userdata,
752 0, num_entries);
753 if (err)
754 return err;
755
756 nvgpu_submit_append_gpfifo_common(c, c->gpfifo.pipe,
757 num_entries);
758 } else {
759 /* from kernel to either sysmem or vidmem, don't need
760 * copy_user_gpfifo so use the common path */
761 nvgpu_submit_append_gpfifo_common(c, kern_gpfifo, num_entries);
762 }
763
764 trace_write_pushbuffers(c, num_entries);
765
766 c->gpfifo.put = (c->gpfifo.put + num_entries) &
767 (c->gpfifo.entry_num - 1);
768
769 return 0;
770}
771
772static int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
773 struct nvgpu_gpfifo_entry *gpfifo,
774 struct nvgpu_gpfifo_userdata userdata,
775 u32 num_entries,
776 u32 flags,
777 struct nvgpu_channel_fence *fence,
778 struct gk20a_fence **fence_out,
779 struct fifo_profile_gk20a *profile)
780{
781 struct gk20a *g = c->g;
782 struct priv_cmd_entry *wait_cmd = NULL;
783 struct priv_cmd_entry *incr_cmd = NULL;
784 struct gk20a_fence *post_fence = NULL;
785 struct channel_gk20a_job *job = NULL;
786 /* we might need two extra gpfifo entries - one for pre fence
787 * and one for post fence. */
788 const int extra_entries = 2;
789 bool skip_buffer_refcounting = (flags &
790 NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING);
791 int err = 0;
792 bool need_job_tracking;
793 bool need_deferred_cleanup = false;
794
795 if (nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING))
796 return -ENODEV;
797
798 if (c->has_timedout)
799 return -ETIMEDOUT;
800
801 if (!nvgpu_mem_is_valid(&c->gpfifo.mem))
802 return -ENOMEM;
803
804 /* fifo not large enough for request. Return error immediately.
805 * Kernel can insert gpfifo entries before and after user gpfifos.
806 * So, add extra_entries in user request. Also, HW with fifo size N
807 * can accept only N-1 entreis and so the below condition */
808 if (c->gpfifo.entry_num - 1 < num_entries + extra_entries) {
809 nvgpu_err(g, "not enough gpfifo space allocated");
810 return -ENOMEM;
811 }
812
813 if ((flags & (NVGPU_SUBMIT_FLAGS_FENCE_WAIT |
814 NVGPU_SUBMIT_FLAGS_FENCE_GET)) &&
815 !fence)
816 return -EINVAL;
817
818 /* an address space needs to have been bound at this point. */
819 if (!gk20a_channel_as_bound(c)) {
820 nvgpu_err(g,
821 "not bound to an address space at time of gpfifo"
822 " submission.");
823 return -EINVAL;
824 }
825
826 gk20a_fifo_profile_snapshot(profile, PROFILE_ENTRY);
827
828 /* update debug settings */
829 nvgpu_ltc_sync_enabled(g);
830
831 nvgpu_log_info(g, "channel %d", c->chid);
832
833 /*
834 * Job tracking is necessary for any of the following conditions:
835 * - pre- or post-fence functionality
836 * - channel wdt
837 * - GPU rail-gating with non-deterministic channels
838 * - buffer refcounting
839 *
840 * If none of the conditions are met, then job tracking is not
841 * required and a fast submit can be done (ie. only need to write
842 * out userspace GPFIFO entries and update GP_PUT).
843 */
844 need_job_tracking = (flags & NVGPU_SUBMIT_FLAGS_FENCE_WAIT) ||
845 (flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) ||
846 c->timeout.enabled ||
847 (g->can_railgate && !c->deterministic) ||
848 !skip_buffer_refcounting;
849
850 if (need_job_tracking) {
851 bool need_sync_framework = false;
852
853 /*
854 * If the channel is to have deterministic latency and
855 * job tracking is required, the channel must have
856 * pre-allocated resources. Otherwise, we fail the submit here
857 */
858 if (c->deterministic && !channel_gk20a_is_prealloc_enabled(c))
859 return -EINVAL;
860
861 need_sync_framework =
862 gk20a_channel_sync_needs_sync_framework(g) ||
863 (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE &&
864 flags & NVGPU_SUBMIT_FLAGS_FENCE_GET);
865
866 /*
867 * Deferred clean-up is necessary for any of the following
868 * conditions:
869 * - channel's deterministic flag is not set
870 * - dependency on sync framework, which could make the
871 * behavior of the clean-up operation non-deterministic
872 * (should not be performed in the submit path)
873 * - channel wdt
874 * - GPU rail-gating with non-deterministic channels
875 * - buffer refcounting
876 *
877 * If none of the conditions are met, then deferred clean-up
878 * is not required, and we clean-up one job-tracking
879 * resource in the submit path.
880 */
881 need_deferred_cleanup = !c->deterministic ||
882 need_sync_framework ||
883 c->timeout.enabled ||
884 (g->can_railgate &&
885 !c->deterministic) ||
886 !skip_buffer_refcounting;
887
888 /*
889 * For deterministic channels, we don't allow deferred clean_up
890 * processing to occur. In cases we hit this, we fail the submit
891 */
892 if (c->deterministic && need_deferred_cleanup)
893 return -EINVAL;
894
895 if (!c->deterministic) {
896 /*
897 * Get a power ref unless this is a deterministic
898 * channel that holds them during the channel lifetime.
899 * This one is released by gk20a_channel_clean_up_jobs,
900 * via syncpt or sema interrupt, whichever is used.
901 */
902 err = gk20a_busy(g);
903 if (err) {
904 nvgpu_err(g,
905 "failed to host gk20a to submit gpfifo");
906 nvgpu_print_current(g, NULL, NVGPU_ERROR);
907 return err;
908 }
909 }
910
911 if (!need_deferred_cleanup) {
912 /* clean up a single job */
913 gk20a_channel_clean_up_jobs(c, false);
914 }
915 }
916
917
918 /* Grab access to HW to deal with do_idle */
919 if (c->deterministic)
920 nvgpu_rwsem_down_read(&g->deterministic_busy);
921
922 if (c->deterministic && c->deterministic_railgate_allowed) {
923 /*
924 * Nope - this channel has dropped its own power ref. As
925 * deterministic submits don't hold power on per each submitted
926 * job like normal ones do, the GPU might railgate any time now
927 * and thus submit is disallowed.
928 */
929 err = -EINVAL;
930 goto clean_up;
931 }
932
933 trace_gk20a_channel_submit_gpfifo(g->name,
934 c->chid,
935 num_entries,
936 flags,
937 fence ? fence->id : 0,
938 fence ? fence->value : 0);
939
940 nvgpu_log_info(g, "pre-submit put %d, get %d, size %d",
941 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
942
943 /*
944 * Make sure we have enough space for gpfifo entries. Check cached
945 * values first and then read from HW. If no space, return EAGAIN
946 * and let userpace decide to re-try request or not.
947 */
948 if (nvgpu_gp_free_count(c) < num_entries + extra_entries) {
949 if (nvgpu_get_gp_free_count(c) < num_entries + extra_entries) {
950 err = -EAGAIN;
951 goto clean_up;
952 }
953 }
954
955 if (c->has_timedout) {
956 err = -ETIMEDOUT;
957 goto clean_up;
958 }
959
960 if (need_job_tracking) {
961 err = channel_gk20a_alloc_job(c, &job);
962 if (err)
963 goto clean_up;
964
965 err = gk20a_submit_prepare_syncs(c, fence, job,
966 &wait_cmd, &incr_cmd,
967 &post_fence,
968 need_deferred_cleanup,
969 flags);
970 if (err)
971 goto clean_up_job;
972 }
973
974 gk20a_fifo_profile_snapshot(profile, PROFILE_JOB_TRACKING);
975
976 if (wait_cmd)
977 gk20a_submit_append_priv_cmdbuf(c, wait_cmd);
978
979 err = nvgpu_submit_append_gpfifo(c, gpfifo, userdata,
980 num_entries);
981 if (err)
982 goto clean_up_job;
983
984 /*
985 * And here's where we add the incr_cmd we generated earlier. It should
986 * always run!
987 */
988 if (incr_cmd)
989 gk20a_submit_append_priv_cmdbuf(c, incr_cmd);
990
991 if (fence_out)
992 *fence_out = gk20a_fence_get(post_fence);
993
994 if (need_job_tracking)
995 /* TODO! Check for errors... */
996 gk20a_channel_add_job(c, job, skip_buffer_refcounting);
997 gk20a_fifo_profile_snapshot(profile, PROFILE_APPEND);
998
999 g->ops.fifo.userd_gp_put(g, c);
1000
1001 /* No hw access beyond this point */
1002 if (c->deterministic)
1003 nvgpu_rwsem_up_read(&g->deterministic_busy);
1004
1005 trace_gk20a_channel_submitted_gpfifo(g->name,
1006 c->chid,
1007 num_entries,
1008 flags,
1009 post_fence ? post_fence->syncpt_id : 0,
1010 post_fence ? post_fence->syncpt_value : 0);
1011
1012 nvgpu_log_info(g, "post-submit put %d, get %d, size %d",
1013 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
1014
1015 gk20a_fifo_profile_snapshot(profile, PROFILE_END);
1016
1017 nvgpu_log_fn(g, "done");
1018 return err;
1019
1020clean_up_job:
1021 channel_gk20a_free_job(c, job);
1022clean_up:
1023 nvgpu_log_fn(g, "fail");
1024 gk20a_fence_put(post_fence);
1025 if (c->deterministic)
1026 nvgpu_rwsem_up_read(&g->deterministic_busy);
1027 else if (need_deferred_cleanup)
1028 gk20a_idle(g);
1029
1030 return err;
1031}
1032
1033int gk20a_submit_channel_gpfifo_user(struct channel_gk20a *c,
1034 struct nvgpu_gpfifo_userdata userdata,
1035 u32 num_entries,
1036 u32 flags,
1037 struct nvgpu_channel_fence *fence,
1038 struct gk20a_fence **fence_out,
1039 struct fifo_profile_gk20a *profile)
1040{
1041 return gk20a_submit_channel_gpfifo(c, NULL, userdata, num_entries,
1042 flags, fence, fence_out, profile);
1043}
1044
1045int gk20a_submit_channel_gpfifo_kernel(struct channel_gk20a *c,
1046 struct nvgpu_gpfifo_entry *gpfifo,
1047 u32 num_entries,
1048 u32 flags,
1049 struct nvgpu_channel_fence *fence,
1050 struct gk20a_fence **fence_out)
1051{
1052 struct nvgpu_gpfifo_userdata userdata = { NULL, NULL };
1053 return gk20a_submit_channel_gpfifo(c, gpfifo, userdata, num_entries,
1054 flags, fence, fence_out, NULL);
1055} 507}
508#endif
diff --git a/drivers/gpu/nvgpu/os/linux/channel.h b/drivers/gpu/nvgpu/os/linux/channel.h
index 43fa492b..87231a79 100644
--- a/drivers/gpu/nvgpu/os/linux/channel.h
+++ b/drivers/gpu/nvgpu/os/linux/channel.h
@@ -84,19 +84,4 @@ struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g,
84 int runlist_id, 84 int runlist_id,
85 bool is_privileged_channel); 85 bool is_privileged_channel);
86 86
87int gk20a_submit_channel_gpfifo_user(struct channel_gk20a *c,
88 struct nvgpu_gpfifo_userdata userdata,
89 u32 num_entries,
90 u32 flags,
91 struct nvgpu_channel_fence *fence,
92 struct gk20a_fence **fence_out,
93 struct fifo_profile_gk20a *profile);
94
95int gk20a_submit_channel_gpfifo_kernel(struct channel_gk20a *c,
96 struct nvgpu_gpfifo_entry *gpfifo,
97 u32 num_entries,
98 u32 flags,
99 struct nvgpu_channel_fence *fence,
100 struct gk20a_fence **fence_out);
101
102#endif /* __NVGPU_CHANNEL_H__ */ 87#endif /* __NVGPU_CHANNEL_H__ */
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_channel.c b/drivers/gpu/nvgpu/os/linux/ioctl_channel.c
index fa6a02d6..7b003b76 100644
--- a/drivers/gpu/nvgpu/os/linux/ioctl_channel.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_channel.c
@@ -34,6 +34,7 @@
34#include <nvgpu/barrier.h> 34#include <nvgpu/barrier.h>
35#include <nvgpu/nvhost.h> 35#include <nvgpu/nvhost.h>
36#include <nvgpu/os_sched.h> 36#include <nvgpu/os_sched.h>
37#include <nvgpu/channel.h>
37 38
38#include "gk20a/gk20a.h" 39#include "gk20a/gk20a.h"
39#include "gk20a/dbg_gpu_gk20a.h" 40#include "gk20a/dbg_gpu_gk20a.h"
@@ -799,11 +800,11 @@ static int gk20a_ioctl_channel_submit_gpfifo(
799 return fd; 800 return fd;
800 } 801 }
801 802
802 userdata.entries = (struct nvgpu_gpfifo_entry __user*) 803 userdata.entries = (struct nvgpu_gpfifo_entry __user *)
803 (uintptr_t)args->gpfifo; 804 (uintptr_t)args->gpfifo;
804 userdata.context = NULL; 805 userdata.context = NULL;
805 806
806 ret = gk20a_submit_channel_gpfifo_user(ch, 807 ret = nvgpu_submit_channel_gpfifo_user(ch,
807 userdata, args->num_entries, 808 userdata, args->num_entries,
808 submit_flags, &fence, &fence_out, profile); 809 submit_flags, &fence, &fence_out, profile);
809 810