summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/common/fifo/submit.c
diff options
context:
space:
mode:
authorKonsta Holtta <kholtta@nvidia.com>2018-06-25 05:35:42 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2018-06-27 21:40:16 -0400
commit7998233b77a343d002b699d5f348bbeb243e16f5 (patch)
treeaa24afcc414be8fbccf6991804f69946e2b72525 /drivers/gpu/nvgpu/common/fifo/submit.c
parent2ac6fb4253fa815ed17f09a01141b938c826dac9 (diff)
gpu: nvgpu: move submit code to common
To finish OS unification of the submit path, move the gk20a_submit_channel_gpfifo* functions to a file that's accessible also outside Linux code. Also change the prefix of the submit functions from gk20a_ to nvgpu_. Jira NVGPU-705 Change-Id: I8ca355d1eb69771fb016c7a21fc7f102ca7967d7 Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1760421 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/common/fifo/submit.c')
-rw-r--r--drivers/gpu/nvgpu/common/fifo/submit.c577
1 files changed, 577 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/fifo/submit.c b/drivers/gpu/nvgpu/common/fifo/submit.c
new file mode 100644
index 00000000..daeee608
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/fifo/submit.c
@@ -0,0 +1,577 @@
1/*
2 * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23#include <nvgpu/channel.h>
24#include <nvgpu/ltc.h>
25#include <nvgpu/os_sched.h>
26
27#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
28
29#include "gk20a/gk20a.h"
30#include "gk20a/channel_gk20a.h"
31
32#include <trace/events/gk20a.h>
33
34/*
35 * Handle the submit synchronization - pre-fences and post-fences.
36 */
37static int nvgpu_submit_prepare_syncs(struct channel_gk20a *c,
38 struct nvgpu_channel_fence *fence,
39 struct channel_gk20a_job *job,
40 struct priv_cmd_entry **wait_cmd,
41 struct priv_cmd_entry **incr_cmd,
42 struct gk20a_fence **post_fence,
43 bool register_irq,
44 u32 flags)
45{
46 struct gk20a *g = c->g;
47 bool need_sync_fence = false;
48 bool new_sync_created = false;
49 int wait_fence_fd = -1;
50 int err = 0;
51 bool need_wfi = !(flags & NVGPU_SUBMIT_FLAGS_SUPPRESS_WFI);
52 bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(c);
53
54 if (g->aggressive_sync_destroy_thresh) {
55 nvgpu_mutex_acquire(&c->sync_lock);
56 if (!c->sync) {
57 c->sync = gk20a_channel_sync_create(c, false);
58 if (!c->sync) {
59 err = -ENOMEM;
60 nvgpu_mutex_release(&c->sync_lock);
61 goto fail;
62 }
63 new_sync_created = true;
64 }
65 nvgpu_atomic_inc(&c->sync->refcount);
66 nvgpu_mutex_release(&c->sync_lock);
67 }
68
69 if (g->ops.fifo.resetup_ramfc && new_sync_created) {
70 err = g->ops.fifo.resetup_ramfc(c);
71 if (err)
72 goto fail;
73 }
74
75 /*
76 * Optionally insert syncpt/semaphore wait in the beginning of gpfifo
77 * submission when user requested and the wait hasn't expired.
78 */
79 if (flags & NVGPU_SUBMIT_FLAGS_FENCE_WAIT) {
80 int max_wait_cmds = c->deterministic ? 1 : 0;
81
82 if (!pre_alloc_enabled)
83 job->wait_cmd = nvgpu_kzalloc(g,
84 sizeof(struct priv_cmd_entry));
85
86 if (!job->wait_cmd) {
87 err = -ENOMEM;
88 goto fail;
89 }
90
91 if (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE) {
92 wait_fence_fd = fence->id;
93 err = c->sync->wait_fd(c->sync, wait_fence_fd,
94 job->wait_cmd, max_wait_cmds);
95 } else {
96 err = c->sync->wait_syncpt(c->sync, fence->id,
97 fence->value,
98 job->wait_cmd);
99 }
100
101 if (err)
102 goto clean_up_wait_cmd;
103
104 if (job->wait_cmd->valid)
105 *wait_cmd = job->wait_cmd;
106 }
107
108 if ((flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) &&
109 (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE))
110 need_sync_fence = true;
111
112 /*
113 * Always generate an increment at the end of a GPFIFO submission. This
114 * is used to keep track of method completion for idle railgating. The
115 * sync_pt/semaphore PB is added to the GPFIFO later on in submit.
116 */
117 job->post_fence = gk20a_alloc_fence(c);
118 if (!job->post_fence) {
119 err = -ENOMEM;
120 goto clean_up_wait_cmd;
121 }
122 if (!pre_alloc_enabled)
123 job->incr_cmd = nvgpu_kzalloc(g, sizeof(struct priv_cmd_entry));
124
125 if (!job->incr_cmd) {
126 err = -ENOMEM;
127 goto clean_up_post_fence;
128 }
129
130 if (flags & NVGPU_SUBMIT_FLAGS_FENCE_GET)
131 err = c->sync->incr_user(c->sync, wait_fence_fd, job->incr_cmd,
132 job->post_fence, need_wfi, need_sync_fence,
133 register_irq);
134 else
135 err = c->sync->incr(c->sync, job->incr_cmd,
136 job->post_fence, need_sync_fence,
137 register_irq);
138 if (!err) {
139 *incr_cmd = job->incr_cmd;
140 *post_fence = job->post_fence;
141 } else
142 goto clean_up_incr_cmd;
143
144 return 0;
145
146clean_up_incr_cmd:
147 free_priv_cmdbuf(c, job->incr_cmd);
148 if (!pre_alloc_enabled)
149 job->incr_cmd = NULL;
150clean_up_post_fence:
151 gk20a_fence_put(job->post_fence);
152 job->post_fence = NULL;
153clean_up_wait_cmd:
154 if (job->wait_cmd)
155 free_priv_cmdbuf(c, job->wait_cmd);
156 if (!pre_alloc_enabled)
157 job->wait_cmd = NULL;
158fail:
159 *wait_cmd = NULL;
160 return err;
161}
162
163static void nvgpu_submit_append_priv_cmdbuf(struct channel_gk20a *c,
164 struct priv_cmd_entry *cmd)
165{
166 struct gk20a *g = c->g;
167 struct nvgpu_mem *gpfifo_mem = &c->gpfifo.mem;
168 struct nvgpu_gpfifo_entry x = {
169 .entry0 = u64_lo32(cmd->gva),
170 .entry1 = u64_hi32(cmd->gva) |
171 pbdma_gp_entry1_length_f(cmd->size)
172 };
173
174 nvgpu_mem_wr_n(g, gpfifo_mem, c->gpfifo.put * sizeof(x),
175 &x, sizeof(x));
176
177 if (cmd->mem->aperture == APERTURE_SYSMEM)
178 trace_gk20a_push_cmdbuf(g->name, 0, cmd->size, 0,
179 (u32 *)cmd->mem->cpu_va + cmd->off);
180
181 c->gpfifo.put = (c->gpfifo.put + 1) & (c->gpfifo.entry_num - 1);
182}
183
184static int nvgpu_submit_append_gpfifo_user_direct(struct channel_gk20a *c,
185 struct nvgpu_gpfifo_userdata userdata,
186 u32 num_entries)
187{
188 struct gk20a *g = c->g;
189 struct nvgpu_gpfifo_entry *gpfifo_cpu = c->gpfifo.mem.cpu_va;
190 u32 gpfifo_size = c->gpfifo.entry_num;
191 u32 len = num_entries;
192 u32 start = c->gpfifo.put;
193 u32 end = start + len; /* exclusive */
194 int err;
195
196 if (end > gpfifo_size) {
197 /* wrap-around */
198 int length0 = gpfifo_size - start;
199 int length1 = len - length0;
200
201 err = g->os_channel.copy_user_gpfifo(
202 gpfifo_cpu + start, userdata,
203 0, length0);
204 if (err)
205 return err;
206
207 err = g->os_channel.copy_user_gpfifo(
208 gpfifo_cpu, userdata,
209 length0, length1);
210 if (err)
211 return err;
212 } else {
213 err = g->os_channel.copy_user_gpfifo(
214 gpfifo_cpu + start, userdata,
215 0, len);
216 if (err)
217 return err;
218 }
219
220 return 0;
221}
222
223static void nvgpu_submit_append_gpfifo_common(struct channel_gk20a *c,
224 struct nvgpu_gpfifo_entry *src, u32 num_entries)
225{
226 struct gk20a *g = c->g;
227 struct nvgpu_mem *gpfifo_mem = &c->gpfifo.mem;
228 /* in bytes */
229 u32 gpfifo_size =
230 c->gpfifo.entry_num * sizeof(struct nvgpu_gpfifo_entry);
231 u32 len = num_entries * sizeof(struct nvgpu_gpfifo_entry);
232 u32 start = c->gpfifo.put * sizeof(struct nvgpu_gpfifo_entry);
233 u32 end = start + len; /* exclusive */
234
235 if (end > gpfifo_size) {
236 /* wrap-around */
237 int length0 = gpfifo_size - start;
238 int length1 = len - length0;
239 struct nvgpu_gpfifo_entry *src2 = src + length0;
240
241 nvgpu_mem_wr_n(g, gpfifo_mem, start, src, length0);
242 nvgpu_mem_wr_n(g, gpfifo_mem, 0, src2, length1);
243 } else {
244 nvgpu_mem_wr_n(g, gpfifo_mem, start, src, len);
245 }
246}
247
248/*
249 * Copy source gpfifo entries into the gpfifo ring buffer, potentially
250 * splitting into two memcpys to handle wrap-around.
251 */
252static int nvgpu_submit_append_gpfifo(struct channel_gk20a *c,
253 struct nvgpu_gpfifo_entry *kern_gpfifo,
254 struct nvgpu_gpfifo_userdata userdata,
255 u32 num_entries)
256{
257 struct gk20a *g = c->g;
258 int err;
259
260 if (!kern_gpfifo && !c->gpfifo.pipe) {
261 /*
262 * This path (from userspace to sysmem) is special in order to
263 * avoid two copies unnecessarily (from user to pipe, then from
264 * pipe to gpu sysmem buffer).
265 */
266 err = nvgpu_submit_append_gpfifo_user_direct(c, userdata,
267 num_entries);
268 if (err)
269 return err;
270 } else if (!kern_gpfifo) {
271 /* from userspace to vidmem, use the common path */
272 err = g->os_channel.copy_user_gpfifo(c->gpfifo.pipe, userdata,
273 0, num_entries);
274 if (err)
275 return err;
276
277 nvgpu_submit_append_gpfifo_common(c, c->gpfifo.pipe,
278 num_entries);
279 } else {
280 /* from kernel to either sysmem or vidmem, don't need
281 * copy_user_gpfifo so use the common path */
282 nvgpu_submit_append_gpfifo_common(c, kern_gpfifo, num_entries);
283 }
284
285 trace_write_pushbuffers(c, num_entries);
286
287 c->gpfifo.put = (c->gpfifo.put + num_entries) &
288 (c->gpfifo.entry_num - 1);
289
290 return 0;
291}
292
293static int nvgpu_submit_channel_gpfifo(struct channel_gk20a *c,
294 struct nvgpu_gpfifo_entry *gpfifo,
295 struct nvgpu_gpfifo_userdata userdata,
296 u32 num_entries,
297 u32 flags,
298 struct nvgpu_channel_fence *fence,
299 struct gk20a_fence **fence_out,
300 struct fifo_profile_gk20a *profile)
301{
302 struct gk20a *g = c->g;
303 struct priv_cmd_entry *wait_cmd = NULL;
304 struct priv_cmd_entry *incr_cmd = NULL;
305 struct gk20a_fence *post_fence = NULL;
306 struct channel_gk20a_job *job = NULL;
307 /* we might need two extra gpfifo entries - one for pre fence
308 * and one for post fence. */
309 const int extra_entries = 2;
310 bool skip_buffer_refcounting = (flags &
311 NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING);
312 int err = 0;
313 bool need_job_tracking;
314 bool need_deferred_cleanup = false;
315
316 if (nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING))
317 return -ENODEV;
318
319 if (c->has_timedout)
320 return -ETIMEDOUT;
321
322 if (!nvgpu_mem_is_valid(&c->gpfifo.mem))
323 return -ENOMEM;
324
325 /* fifo not large enough for request. Return error immediately.
326 * Kernel can insert gpfifo entries before and after user gpfifos.
327 * So, add extra_entries in user request. Also, HW with fifo size N
328 * can accept only N-1 entreis and so the below condition */
329 if (c->gpfifo.entry_num - 1 < num_entries + extra_entries) {
330 nvgpu_err(g, "not enough gpfifo space allocated");
331 return -ENOMEM;
332 }
333
334 if ((flags & (NVGPU_SUBMIT_FLAGS_FENCE_WAIT |
335 NVGPU_SUBMIT_FLAGS_FENCE_GET)) &&
336 !fence)
337 return -EINVAL;
338
339 /* an address space needs to have been bound at this point. */
340 if (!gk20a_channel_as_bound(c)) {
341 nvgpu_err(g,
342 "not bound to an address space at time of gpfifo"
343 " submission.");
344 return -EINVAL;
345 }
346
347 gk20a_fifo_profile_snapshot(profile, PROFILE_ENTRY);
348
349 /* update debug settings */
350 nvgpu_ltc_sync_enabled(g);
351
352 nvgpu_log_info(g, "channel %d", c->chid);
353
354 /*
355 * Job tracking is necessary for any of the following conditions:
356 * - pre- or post-fence functionality
357 * - channel wdt
358 * - GPU rail-gating with non-deterministic channels
359 * - buffer refcounting
360 *
361 * If none of the conditions are met, then job tracking is not
362 * required and a fast submit can be done (ie. only need to write
363 * out userspace GPFIFO entries and update GP_PUT).
364 */
365 need_job_tracking = (flags & NVGPU_SUBMIT_FLAGS_FENCE_WAIT) ||
366 (flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) ||
367 c->timeout.enabled ||
368 (g->can_railgate && !c->deterministic) ||
369 !skip_buffer_refcounting;
370
371 if (need_job_tracking) {
372 bool need_sync_framework = false;
373
374 /*
375 * If the channel is to have deterministic latency and
376 * job tracking is required, the channel must have
377 * pre-allocated resources. Otherwise, we fail the submit here
378 */
379 if (c->deterministic && !channel_gk20a_is_prealloc_enabled(c))
380 return -EINVAL;
381
382 need_sync_framework =
383 gk20a_channel_sync_needs_sync_framework(g) ||
384 (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE &&
385 flags & NVGPU_SUBMIT_FLAGS_FENCE_GET);
386
387 /*
388 * Deferred clean-up is necessary for any of the following
389 * conditions:
390 * - channel's deterministic flag is not set
391 * - dependency on sync framework, which could make the
392 * behavior of the clean-up operation non-deterministic
393 * (should not be performed in the submit path)
394 * - channel wdt
395 * - GPU rail-gating with non-deterministic channels
396 * - buffer refcounting
397 *
398 * If none of the conditions are met, then deferred clean-up
399 * is not required, and we clean-up one job-tracking
400 * resource in the submit path.
401 */
402 need_deferred_cleanup = !c->deterministic ||
403 need_sync_framework ||
404 c->timeout.enabled ||
405 (g->can_railgate &&
406 !c->deterministic) ||
407 !skip_buffer_refcounting;
408
409 /*
410 * For deterministic channels, we don't allow deferred clean_up
411 * processing to occur. In cases we hit this, we fail the submit
412 */
413 if (c->deterministic && need_deferred_cleanup)
414 return -EINVAL;
415
416 if (!c->deterministic) {
417 /*
418 * Get a power ref unless this is a deterministic
419 * channel that holds them during the channel lifetime.
420 * This one is released by gk20a_channel_clean_up_jobs,
421 * via syncpt or sema interrupt, whichever is used.
422 */
423 err = gk20a_busy(g);
424 if (err) {
425 nvgpu_err(g,
426 "failed to host gk20a to submit gpfifo");
427 nvgpu_print_current(g, NULL, NVGPU_ERROR);
428 return err;
429 }
430 }
431
432 if (!need_deferred_cleanup) {
433 /* clean up a single job */
434 gk20a_channel_clean_up_jobs(c, false);
435 }
436 }
437
438
439 /* Grab access to HW to deal with do_idle */
440 if (c->deterministic)
441 nvgpu_rwsem_down_read(&g->deterministic_busy);
442
443 if (c->deterministic && c->deterministic_railgate_allowed) {
444 /*
445 * Nope - this channel has dropped its own power ref. As
446 * deterministic submits don't hold power on per each submitted
447 * job like normal ones do, the GPU might railgate any time now
448 * and thus submit is disallowed.
449 */
450 err = -EINVAL;
451 goto clean_up;
452 }
453
454 trace_gk20a_channel_submit_gpfifo(g->name,
455 c->chid,
456 num_entries,
457 flags,
458 fence ? fence->id : 0,
459 fence ? fence->value : 0);
460
461 nvgpu_log_info(g, "pre-submit put %d, get %d, size %d",
462 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
463
464 /*
465 * Make sure we have enough space for gpfifo entries. Check cached
466 * values first and then read from HW. If no space, return EAGAIN
467 * and let userpace decide to re-try request or not.
468 */
469 if (nvgpu_gp_free_count(c) < num_entries + extra_entries) {
470 if (nvgpu_get_gp_free_count(c) < num_entries + extra_entries) {
471 err = -EAGAIN;
472 goto clean_up;
473 }
474 }
475
476 if (c->has_timedout) {
477 err = -ETIMEDOUT;
478 goto clean_up;
479 }
480
481 if (need_job_tracking) {
482 err = channel_gk20a_alloc_job(c, &job);
483 if (err)
484 goto clean_up;
485
486 err = nvgpu_submit_prepare_syncs(c, fence, job,
487 &wait_cmd, &incr_cmd,
488 &post_fence,
489 need_deferred_cleanup,
490 flags);
491 if (err)
492 goto clean_up_job;
493 }
494
495 gk20a_fifo_profile_snapshot(profile, PROFILE_JOB_TRACKING);
496
497 if (wait_cmd)
498 nvgpu_submit_append_priv_cmdbuf(c, wait_cmd);
499
500 err = nvgpu_submit_append_gpfifo(c, gpfifo, userdata,
501 num_entries);
502 if (err)
503 goto clean_up_job;
504
505 /*
506 * And here's where we add the incr_cmd we generated earlier. It should
507 * always run!
508 */
509 if (incr_cmd)
510 nvgpu_submit_append_priv_cmdbuf(c, incr_cmd);
511
512 if (fence_out)
513 *fence_out = gk20a_fence_get(post_fence);
514
515 if (need_job_tracking)
516 /* TODO! Check for errors... */
517 gk20a_channel_add_job(c, job, skip_buffer_refcounting);
518 gk20a_fifo_profile_snapshot(profile, PROFILE_APPEND);
519
520 g->ops.fifo.userd_gp_put(g, c);
521
522 /* No hw access beyond this point */
523 if (c->deterministic)
524 nvgpu_rwsem_up_read(&g->deterministic_busy);
525
526 trace_gk20a_channel_submitted_gpfifo(g->name,
527 c->chid,
528 num_entries,
529 flags,
530 post_fence ? post_fence->syncpt_id : 0,
531 post_fence ? post_fence->syncpt_value : 0);
532
533 nvgpu_log_info(g, "post-submit put %d, get %d, size %d",
534 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
535
536 gk20a_fifo_profile_snapshot(profile, PROFILE_END);
537
538 nvgpu_log_fn(g, "done");
539 return err;
540
541clean_up_job:
542 channel_gk20a_free_job(c, job);
543clean_up:
544 nvgpu_log_fn(g, "fail");
545 gk20a_fence_put(post_fence);
546 if (c->deterministic)
547 nvgpu_rwsem_up_read(&g->deterministic_busy);
548 else if (need_deferred_cleanup)
549 gk20a_idle(g);
550
551 return err;
552}
553
554int nvgpu_submit_channel_gpfifo_user(struct channel_gk20a *c,
555 struct nvgpu_gpfifo_userdata userdata,
556 u32 num_entries,
557 u32 flags,
558 struct nvgpu_channel_fence *fence,
559 struct gk20a_fence **fence_out,
560 struct fifo_profile_gk20a *profile)
561{
562 return nvgpu_submit_channel_gpfifo(c, NULL, userdata, num_entries,
563 flags, fence, fence_out, profile);
564}
565
566int nvgpu_submit_channel_gpfifo_kernel(struct channel_gk20a *c,
567 struct nvgpu_gpfifo_entry *gpfifo,
568 u32 num_entries,
569 u32 flags,
570 struct nvgpu_channel_fence *fence,
571 struct gk20a_fence **fence_out)
572{
573 struct nvgpu_gpfifo_userdata userdata = { NULL, NULL };
574
575 return nvgpu_submit_channel_gpfifo(c, gpfifo, userdata, num_entries,
576 flags, fence, fence_out, NULL);
577}