diff options
author | Konsta Holtta <kholtta@nvidia.com> | 2018-06-25 05:35:42 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2018-06-27 21:40:16 -0400 |
commit | 7998233b77a343d002b699d5f348bbeb243e16f5 (patch) | |
tree | aa24afcc414be8fbccf6991804f69946e2b72525 /drivers/gpu/nvgpu/common | |
parent | 2ac6fb4253fa815ed17f09a01141b938c826dac9 (diff) |
gpu: nvgpu: move submit code to common
To finish OS unification of the submit path, move the
gk20a_submit_channel_gpfifo* functions to a file that's accessible also
outside Linux code.
Also change the prefix of the submit functions from gk20a_ to nvgpu_.
Jira NVGPU-705
Change-Id: I8ca355d1eb69771fb016c7a21fc7f102ca7967d7
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1760421
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/common')
-rw-r--r-- | drivers/gpu/nvgpu/common/fifo/submit.c | 577 |
1 files changed, 577 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/fifo/submit.c b/drivers/gpu/nvgpu/common/fifo/submit.c new file mode 100644 index 00000000..daeee608 --- /dev/null +++ b/drivers/gpu/nvgpu/common/fifo/submit.c | |||
@@ -0,0 +1,577 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
5 | * copy of this software and associated documentation files (the "Software"), | ||
6 | * to deal in the Software without restriction, including without limitation | ||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
9 | * Software is furnished to do so, subject to the following conditions: | ||
10 | * | ||
11 | * The above copyright notice and this permission notice shall be included in | ||
12 | * all copies or substantial portions of the Software. | ||
13 | * | ||
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
20 | * DEALINGS IN THE SOFTWARE. | ||
21 | */ | ||
22 | |||
23 | #include <nvgpu/channel.h> | ||
24 | #include <nvgpu/ltc.h> | ||
25 | #include <nvgpu/os_sched.h> | ||
26 | |||
27 | #include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h> | ||
28 | |||
29 | #include "gk20a/gk20a.h" | ||
30 | #include "gk20a/channel_gk20a.h" | ||
31 | |||
32 | #include <trace/events/gk20a.h> | ||
33 | |||
34 | /* | ||
35 | * Handle the submit synchronization - pre-fences and post-fences. | ||
36 | */ | ||
37 | static int nvgpu_submit_prepare_syncs(struct channel_gk20a *c, | ||
38 | struct nvgpu_channel_fence *fence, | ||
39 | struct channel_gk20a_job *job, | ||
40 | struct priv_cmd_entry **wait_cmd, | ||
41 | struct priv_cmd_entry **incr_cmd, | ||
42 | struct gk20a_fence **post_fence, | ||
43 | bool register_irq, | ||
44 | u32 flags) | ||
45 | { | ||
46 | struct gk20a *g = c->g; | ||
47 | bool need_sync_fence = false; | ||
48 | bool new_sync_created = false; | ||
49 | int wait_fence_fd = -1; | ||
50 | int err = 0; | ||
51 | bool need_wfi = !(flags & NVGPU_SUBMIT_FLAGS_SUPPRESS_WFI); | ||
52 | bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(c); | ||
53 | |||
54 | if (g->aggressive_sync_destroy_thresh) { | ||
55 | nvgpu_mutex_acquire(&c->sync_lock); | ||
56 | if (!c->sync) { | ||
57 | c->sync = gk20a_channel_sync_create(c, false); | ||
58 | if (!c->sync) { | ||
59 | err = -ENOMEM; | ||
60 | nvgpu_mutex_release(&c->sync_lock); | ||
61 | goto fail; | ||
62 | } | ||
63 | new_sync_created = true; | ||
64 | } | ||
65 | nvgpu_atomic_inc(&c->sync->refcount); | ||
66 | nvgpu_mutex_release(&c->sync_lock); | ||
67 | } | ||
68 | |||
69 | if (g->ops.fifo.resetup_ramfc && new_sync_created) { | ||
70 | err = g->ops.fifo.resetup_ramfc(c); | ||
71 | if (err) | ||
72 | goto fail; | ||
73 | } | ||
74 | |||
75 | /* | ||
76 | * Optionally insert syncpt/semaphore wait in the beginning of gpfifo | ||
77 | * submission when user requested and the wait hasn't expired. | ||
78 | */ | ||
79 | if (flags & NVGPU_SUBMIT_FLAGS_FENCE_WAIT) { | ||
80 | int max_wait_cmds = c->deterministic ? 1 : 0; | ||
81 | |||
82 | if (!pre_alloc_enabled) | ||
83 | job->wait_cmd = nvgpu_kzalloc(g, | ||
84 | sizeof(struct priv_cmd_entry)); | ||
85 | |||
86 | if (!job->wait_cmd) { | ||
87 | err = -ENOMEM; | ||
88 | goto fail; | ||
89 | } | ||
90 | |||
91 | if (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE) { | ||
92 | wait_fence_fd = fence->id; | ||
93 | err = c->sync->wait_fd(c->sync, wait_fence_fd, | ||
94 | job->wait_cmd, max_wait_cmds); | ||
95 | } else { | ||
96 | err = c->sync->wait_syncpt(c->sync, fence->id, | ||
97 | fence->value, | ||
98 | job->wait_cmd); | ||
99 | } | ||
100 | |||
101 | if (err) | ||
102 | goto clean_up_wait_cmd; | ||
103 | |||
104 | if (job->wait_cmd->valid) | ||
105 | *wait_cmd = job->wait_cmd; | ||
106 | } | ||
107 | |||
108 | if ((flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) && | ||
109 | (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE)) | ||
110 | need_sync_fence = true; | ||
111 | |||
112 | /* | ||
113 | * Always generate an increment at the end of a GPFIFO submission. This | ||
114 | * is used to keep track of method completion for idle railgating. The | ||
115 | * sync_pt/semaphore PB is added to the GPFIFO later on in submit. | ||
116 | */ | ||
117 | job->post_fence = gk20a_alloc_fence(c); | ||
118 | if (!job->post_fence) { | ||
119 | err = -ENOMEM; | ||
120 | goto clean_up_wait_cmd; | ||
121 | } | ||
122 | if (!pre_alloc_enabled) | ||
123 | job->incr_cmd = nvgpu_kzalloc(g, sizeof(struct priv_cmd_entry)); | ||
124 | |||
125 | if (!job->incr_cmd) { | ||
126 | err = -ENOMEM; | ||
127 | goto clean_up_post_fence; | ||
128 | } | ||
129 | |||
130 | if (flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) | ||
131 | err = c->sync->incr_user(c->sync, wait_fence_fd, job->incr_cmd, | ||
132 | job->post_fence, need_wfi, need_sync_fence, | ||
133 | register_irq); | ||
134 | else | ||
135 | err = c->sync->incr(c->sync, job->incr_cmd, | ||
136 | job->post_fence, need_sync_fence, | ||
137 | register_irq); | ||
138 | if (!err) { | ||
139 | *incr_cmd = job->incr_cmd; | ||
140 | *post_fence = job->post_fence; | ||
141 | } else | ||
142 | goto clean_up_incr_cmd; | ||
143 | |||
144 | return 0; | ||
145 | |||
146 | clean_up_incr_cmd: | ||
147 | free_priv_cmdbuf(c, job->incr_cmd); | ||
148 | if (!pre_alloc_enabled) | ||
149 | job->incr_cmd = NULL; | ||
150 | clean_up_post_fence: | ||
151 | gk20a_fence_put(job->post_fence); | ||
152 | job->post_fence = NULL; | ||
153 | clean_up_wait_cmd: | ||
154 | if (job->wait_cmd) | ||
155 | free_priv_cmdbuf(c, job->wait_cmd); | ||
156 | if (!pre_alloc_enabled) | ||
157 | job->wait_cmd = NULL; | ||
158 | fail: | ||
159 | *wait_cmd = NULL; | ||
160 | return err; | ||
161 | } | ||
162 | |||
163 | static void nvgpu_submit_append_priv_cmdbuf(struct channel_gk20a *c, | ||
164 | struct priv_cmd_entry *cmd) | ||
165 | { | ||
166 | struct gk20a *g = c->g; | ||
167 | struct nvgpu_mem *gpfifo_mem = &c->gpfifo.mem; | ||
168 | struct nvgpu_gpfifo_entry x = { | ||
169 | .entry0 = u64_lo32(cmd->gva), | ||
170 | .entry1 = u64_hi32(cmd->gva) | | ||
171 | pbdma_gp_entry1_length_f(cmd->size) | ||
172 | }; | ||
173 | |||
174 | nvgpu_mem_wr_n(g, gpfifo_mem, c->gpfifo.put * sizeof(x), | ||
175 | &x, sizeof(x)); | ||
176 | |||
177 | if (cmd->mem->aperture == APERTURE_SYSMEM) | ||
178 | trace_gk20a_push_cmdbuf(g->name, 0, cmd->size, 0, | ||
179 | (u32 *)cmd->mem->cpu_va + cmd->off); | ||
180 | |||
181 | c->gpfifo.put = (c->gpfifo.put + 1) & (c->gpfifo.entry_num - 1); | ||
182 | } | ||
183 | |||
184 | static int nvgpu_submit_append_gpfifo_user_direct(struct channel_gk20a *c, | ||
185 | struct nvgpu_gpfifo_userdata userdata, | ||
186 | u32 num_entries) | ||
187 | { | ||
188 | struct gk20a *g = c->g; | ||
189 | struct nvgpu_gpfifo_entry *gpfifo_cpu = c->gpfifo.mem.cpu_va; | ||
190 | u32 gpfifo_size = c->gpfifo.entry_num; | ||
191 | u32 len = num_entries; | ||
192 | u32 start = c->gpfifo.put; | ||
193 | u32 end = start + len; /* exclusive */ | ||
194 | int err; | ||
195 | |||
196 | if (end > gpfifo_size) { | ||
197 | /* wrap-around */ | ||
198 | int length0 = gpfifo_size - start; | ||
199 | int length1 = len - length0; | ||
200 | |||
201 | err = g->os_channel.copy_user_gpfifo( | ||
202 | gpfifo_cpu + start, userdata, | ||
203 | 0, length0); | ||
204 | if (err) | ||
205 | return err; | ||
206 | |||
207 | err = g->os_channel.copy_user_gpfifo( | ||
208 | gpfifo_cpu, userdata, | ||
209 | length0, length1); | ||
210 | if (err) | ||
211 | return err; | ||
212 | } else { | ||
213 | err = g->os_channel.copy_user_gpfifo( | ||
214 | gpfifo_cpu + start, userdata, | ||
215 | 0, len); | ||
216 | if (err) | ||
217 | return err; | ||
218 | } | ||
219 | |||
220 | return 0; | ||
221 | } | ||
222 | |||
223 | static void nvgpu_submit_append_gpfifo_common(struct channel_gk20a *c, | ||
224 | struct nvgpu_gpfifo_entry *src, u32 num_entries) | ||
225 | { | ||
226 | struct gk20a *g = c->g; | ||
227 | struct nvgpu_mem *gpfifo_mem = &c->gpfifo.mem; | ||
228 | /* in bytes */ | ||
229 | u32 gpfifo_size = | ||
230 | c->gpfifo.entry_num * sizeof(struct nvgpu_gpfifo_entry); | ||
231 | u32 len = num_entries * sizeof(struct nvgpu_gpfifo_entry); | ||
232 | u32 start = c->gpfifo.put * sizeof(struct nvgpu_gpfifo_entry); | ||
233 | u32 end = start + len; /* exclusive */ | ||
234 | |||
235 | if (end > gpfifo_size) { | ||
236 | /* wrap-around */ | ||
237 | int length0 = gpfifo_size - start; | ||
238 | int length1 = len - length0; | ||
239 | struct nvgpu_gpfifo_entry *src2 = src + length0; | ||
240 | |||
241 | nvgpu_mem_wr_n(g, gpfifo_mem, start, src, length0); | ||
242 | nvgpu_mem_wr_n(g, gpfifo_mem, 0, src2, length1); | ||
243 | } else { | ||
244 | nvgpu_mem_wr_n(g, gpfifo_mem, start, src, len); | ||
245 | } | ||
246 | } | ||
247 | |||
248 | /* | ||
249 | * Copy source gpfifo entries into the gpfifo ring buffer, potentially | ||
250 | * splitting into two memcpys to handle wrap-around. | ||
251 | */ | ||
252 | static int nvgpu_submit_append_gpfifo(struct channel_gk20a *c, | ||
253 | struct nvgpu_gpfifo_entry *kern_gpfifo, | ||
254 | struct nvgpu_gpfifo_userdata userdata, | ||
255 | u32 num_entries) | ||
256 | { | ||
257 | struct gk20a *g = c->g; | ||
258 | int err; | ||
259 | |||
260 | if (!kern_gpfifo && !c->gpfifo.pipe) { | ||
261 | /* | ||
262 | * This path (from userspace to sysmem) is special in order to | ||
263 | * avoid two copies unnecessarily (from user to pipe, then from | ||
264 | * pipe to gpu sysmem buffer). | ||
265 | */ | ||
266 | err = nvgpu_submit_append_gpfifo_user_direct(c, userdata, | ||
267 | num_entries); | ||
268 | if (err) | ||
269 | return err; | ||
270 | } else if (!kern_gpfifo) { | ||
271 | /* from userspace to vidmem, use the common path */ | ||
272 | err = g->os_channel.copy_user_gpfifo(c->gpfifo.pipe, userdata, | ||
273 | 0, num_entries); | ||
274 | if (err) | ||
275 | return err; | ||
276 | |||
277 | nvgpu_submit_append_gpfifo_common(c, c->gpfifo.pipe, | ||
278 | num_entries); | ||
279 | } else { | ||
280 | /* from kernel to either sysmem or vidmem, don't need | ||
281 | * copy_user_gpfifo so use the common path */ | ||
282 | nvgpu_submit_append_gpfifo_common(c, kern_gpfifo, num_entries); | ||
283 | } | ||
284 | |||
285 | trace_write_pushbuffers(c, num_entries); | ||
286 | |||
287 | c->gpfifo.put = (c->gpfifo.put + num_entries) & | ||
288 | (c->gpfifo.entry_num - 1); | ||
289 | |||
290 | return 0; | ||
291 | } | ||
292 | |||
293 | static int nvgpu_submit_channel_gpfifo(struct channel_gk20a *c, | ||
294 | struct nvgpu_gpfifo_entry *gpfifo, | ||
295 | struct nvgpu_gpfifo_userdata userdata, | ||
296 | u32 num_entries, | ||
297 | u32 flags, | ||
298 | struct nvgpu_channel_fence *fence, | ||
299 | struct gk20a_fence **fence_out, | ||
300 | struct fifo_profile_gk20a *profile) | ||
301 | { | ||
302 | struct gk20a *g = c->g; | ||
303 | struct priv_cmd_entry *wait_cmd = NULL; | ||
304 | struct priv_cmd_entry *incr_cmd = NULL; | ||
305 | struct gk20a_fence *post_fence = NULL; | ||
306 | struct channel_gk20a_job *job = NULL; | ||
307 | /* we might need two extra gpfifo entries - one for pre fence | ||
308 | * and one for post fence. */ | ||
309 | const int extra_entries = 2; | ||
310 | bool skip_buffer_refcounting = (flags & | ||
311 | NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING); | ||
312 | int err = 0; | ||
313 | bool need_job_tracking; | ||
314 | bool need_deferred_cleanup = false; | ||
315 | |||
316 | if (nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING)) | ||
317 | return -ENODEV; | ||
318 | |||
319 | if (c->has_timedout) | ||
320 | return -ETIMEDOUT; | ||
321 | |||
322 | if (!nvgpu_mem_is_valid(&c->gpfifo.mem)) | ||
323 | return -ENOMEM; | ||
324 | |||
325 | /* fifo not large enough for request. Return error immediately. | ||
326 | * Kernel can insert gpfifo entries before and after user gpfifos. | ||
327 | * So, add extra_entries in user request. Also, HW with fifo size N | ||
328 | * can accept only N-1 entreis and so the below condition */ | ||
329 | if (c->gpfifo.entry_num - 1 < num_entries + extra_entries) { | ||
330 | nvgpu_err(g, "not enough gpfifo space allocated"); | ||
331 | return -ENOMEM; | ||
332 | } | ||
333 | |||
334 | if ((flags & (NVGPU_SUBMIT_FLAGS_FENCE_WAIT | | ||
335 | NVGPU_SUBMIT_FLAGS_FENCE_GET)) && | ||
336 | !fence) | ||
337 | return -EINVAL; | ||
338 | |||
339 | /* an address space needs to have been bound at this point. */ | ||
340 | if (!gk20a_channel_as_bound(c)) { | ||
341 | nvgpu_err(g, | ||
342 | "not bound to an address space at time of gpfifo" | ||
343 | " submission."); | ||
344 | return -EINVAL; | ||
345 | } | ||
346 | |||
347 | gk20a_fifo_profile_snapshot(profile, PROFILE_ENTRY); | ||
348 | |||
349 | /* update debug settings */ | ||
350 | nvgpu_ltc_sync_enabled(g); | ||
351 | |||
352 | nvgpu_log_info(g, "channel %d", c->chid); | ||
353 | |||
354 | /* | ||
355 | * Job tracking is necessary for any of the following conditions: | ||
356 | * - pre- or post-fence functionality | ||
357 | * - channel wdt | ||
358 | * - GPU rail-gating with non-deterministic channels | ||
359 | * - buffer refcounting | ||
360 | * | ||
361 | * If none of the conditions are met, then job tracking is not | ||
362 | * required and a fast submit can be done (ie. only need to write | ||
363 | * out userspace GPFIFO entries and update GP_PUT). | ||
364 | */ | ||
365 | need_job_tracking = (flags & NVGPU_SUBMIT_FLAGS_FENCE_WAIT) || | ||
366 | (flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) || | ||
367 | c->timeout.enabled || | ||
368 | (g->can_railgate && !c->deterministic) || | ||
369 | !skip_buffer_refcounting; | ||
370 | |||
371 | if (need_job_tracking) { | ||
372 | bool need_sync_framework = false; | ||
373 | |||
374 | /* | ||
375 | * If the channel is to have deterministic latency and | ||
376 | * job tracking is required, the channel must have | ||
377 | * pre-allocated resources. Otherwise, we fail the submit here | ||
378 | */ | ||
379 | if (c->deterministic && !channel_gk20a_is_prealloc_enabled(c)) | ||
380 | return -EINVAL; | ||
381 | |||
382 | need_sync_framework = | ||
383 | gk20a_channel_sync_needs_sync_framework(g) || | ||
384 | (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE && | ||
385 | flags & NVGPU_SUBMIT_FLAGS_FENCE_GET); | ||
386 | |||
387 | /* | ||
388 | * Deferred clean-up is necessary for any of the following | ||
389 | * conditions: | ||
390 | * - channel's deterministic flag is not set | ||
391 | * - dependency on sync framework, which could make the | ||
392 | * behavior of the clean-up operation non-deterministic | ||
393 | * (should not be performed in the submit path) | ||
394 | * - channel wdt | ||
395 | * - GPU rail-gating with non-deterministic channels | ||
396 | * - buffer refcounting | ||
397 | * | ||
398 | * If none of the conditions are met, then deferred clean-up | ||
399 | * is not required, and we clean-up one job-tracking | ||
400 | * resource in the submit path. | ||
401 | */ | ||
402 | need_deferred_cleanup = !c->deterministic || | ||
403 | need_sync_framework || | ||
404 | c->timeout.enabled || | ||
405 | (g->can_railgate && | ||
406 | !c->deterministic) || | ||
407 | !skip_buffer_refcounting; | ||
408 | |||
409 | /* | ||
410 | * For deterministic channels, we don't allow deferred clean_up | ||
411 | * processing to occur. In cases we hit this, we fail the submit | ||
412 | */ | ||
413 | if (c->deterministic && need_deferred_cleanup) | ||
414 | return -EINVAL; | ||
415 | |||
416 | if (!c->deterministic) { | ||
417 | /* | ||
418 | * Get a power ref unless this is a deterministic | ||
419 | * channel that holds them during the channel lifetime. | ||
420 | * This one is released by gk20a_channel_clean_up_jobs, | ||
421 | * via syncpt or sema interrupt, whichever is used. | ||
422 | */ | ||
423 | err = gk20a_busy(g); | ||
424 | if (err) { | ||
425 | nvgpu_err(g, | ||
426 | "failed to host gk20a to submit gpfifo"); | ||
427 | nvgpu_print_current(g, NULL, NVGPU_ERROR); | ||
428 | return err; | ||
429 | } | ||
430 | } | ||
431 | |||
432 | if (!need_deferred_cleanup) { | ||
433 | /* clean up a single job */ | ||
434 | gk20a_channel_clean_up_jobs(c, false); | ||
435 | } | ||
436 | } | ||
437 | |||
438 | |||
439 | /* Grab access to HW to deal with do_idle */ | ||
440 | if (c->deterministic) | ||
441 | nvgpu_rwsem_down_read(&g->deterministic_busy); | ||
442 | |||
443 | if (c->deterministic && c->deterministic_railgate_allowed) { | ||
444 | /* | ||
445 | * Nope - this channel has dropped its own power ref. As | ||
446 | * deterministic submits don't hold power on per each submitted | ||
447 | * job like normal ones do, the GPU might railgate any time now | ||
448 | * and thus submit is disallowed. | ||
449 | */ | ||
450 | err = -EINVAL; | ||
451 | goto clean_up; | ||
452 | } | ||
453 | |||
454 | trace_gk20a_channel_submit_gpfifo(g->name, | ||
455 | c->chid, | ||
456 | num_entries, | ||
457 | flags, | ||
458 | fence ? fence->id : 0, | ||
459 | fence ? fence->value : 0); | ||
460 | |||
461 | nvgpu_log_info(g, "pre-submit put %d, get %d, size %d", | ||
462 | c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num); | ||
463 | |||
464 | /* | ||
465 | * Make sure we have enough space for gpfifo entries. Check cached | ||
466 | * values first and then read from HW. If no space, return EAGAIN | ||
467 | * and let userpace decide to re-try request or not. | ||
468 | */ | ||
469 | if (nvgpu_gp_free_count(c) < num_entries + extra_entries) { | ||
470 | if (nvgpu_get_gp_free_count(c) < num_entries + extra_entries) { | ||
471 | err = -EAGAIN; | ||
472 | goto clean_up; | ||
473 | } | ||
474 | } | ||
475 | |||
476 | if (c->has_timedout) { | ||
477 | err = -ETIMEDOUT; | ||
478 | goto clean_up; | ||
479 | } | ||
480 | |||
481 | if (need_job_tracking) { | ||
482 | err = channel_gk20a_alloc_job(c, &job); | ||
483 | if (err) | ||
484 | goto clean_up; | ||
485 | |||
486 | err = nvgpu_submit_prepare_syncs(c, fence, job, | ||
487 | &wait_cmd, &incr_cmd, | ||
488 | &post_fence, | ||
489 | need_deferred_cleanup, | ||
490 | flags); | ||
491 | if (err) | ||
492 | goto clean_up_job; | ||
493 | } | ||
494 | |||
495 | gk20a_fifo_profile_snapshot(profile, PROFILE_JOB_TRACKING); | ||
496 | |||
497 | if (wait_cmd) | ||
498 | nvgpu_submit_append_priv_cmdbuf(c, wait_cmd); | ||
499 | |||
500 | err = nvgpu_submit_append_gpfifo(c, gpfifo, userdata, | ||
501 | num_entries); | ||
502 | if (err) | ||
503 | goto clean_up_job; | ||
504 | |||
505 | /* | ||
506 | * And here's where we add the incr_cmd we generated earlier. It should | ||
507 | * always run! | ||
508 | */ | ||
509 | if (incr_cmd) | ||
510 | nvgpu_submit_append_priv_cmdbuf(c, incr_cmd); | ||
511 | |||
512 | if (fence_out) | ||
513 | *fence_out = gk20a_fence_get(post_fence); | ||
514 | |||
515 | if (need_job_tracking) | ||
516 | /* TODO! Check for errors... */ | ||
517 | gk20a_channel_add_job(c, job, skip_buffer_refcounting); | ||
518 | gk20a_fifo_profile_snapshot(profile, PROFILE_APPEND); | ||
519 | |||
520 | g->ops.fifo.userd_gp_put(g, c); | ||
521 | |||
522 | /* No hw access beyond this point */ | ||
523 | if (c->deterministic) | ||
524 | nvgpu_rwsem_up_read(&g->deterministic_busy); | ||
525 | |||
526 | trace_gk20a_channel_submitted_gpfifo(g->name, | ||
527 | c->chid, | ||
528 | num_entries, | ||
529 | flags, | ||
530 | post_fence ? post_fence->syncpt_id : 0, | ||
531 | post_fence ? post_fence->syncpt_value : 0); | ||
532 | |||
533 | nvgpu_log_info(g, "post-submit put %d, get %d, size %d", | ||
534 | c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num); | ||
535 | |||
536 | gk20a_fifo_profile_snapshot(profile, PROFILE_END); | ||
537 | |||
538 | nvgpu_log_fn(g, "done"); | ||
539 | return err; | ||
540 | |||
541 | clean_up_job: | ||
542 | channel_gk20a_free_job(c, job); | ||
543 | clean_up: | ||
544 | nvgpu_log_fn(g, "fail"); | ||
545 | gk20a_fence_put(post_fence); | ||
546 | if (c->deterministic) | ||
547 | nvgpu_rwsem_up_read(&g->deterministic_busy); | ||
548 | else if (need_deferred_cleanup) | ||
549 | gk20a_idle(g); | ||
550 | |||
551 | return err; | ||
552 | } | ||
553 | |||
554 | int nvgpu_submit_channel_gpfifo_user(struct channel_gk20a *c, | ||
555 | struct nvgpu_gpfifo_userdata userdata, | ||
556 | u32 num_entries, | ||
557 | u32 flags, | ||
558 | struct nvgpu_channel_fence *fence, | ||
559 | struct gk20a_fence **fence_out, | ||
560 | struct fifo_profile_gk20a *profile) | ||
561 | { | ||
562 | return nvgpu_submit_channel_gpfifo(c, NULL, userdata, num_entries, | ||
563 | flags, fence, fence_out, profile); | ||
564 | } | ||
565 | |||
566 | int nvgpu_submit_channel_gpfifo_kernel(struct channel_gk20a *c, | ||
567 | struct nvgpu_gpfifo_entry *gpfifo, | ||
568 | u32 num_entries, | ||
569 | u32 flags, | ||
570 | struct nvgpu_channel_fence *fence, | ||
571 | struct gk20a_fence **fence_out) | ||
572 | { | ||
573 | struct nvgpu_gpfifo_userdata userdata = { NULL, NULL }; | ||
574 | |||
575 | return nvgpu_submit_channel_gpfifo(c, gpfifo, userdata, num_entries, | ||
576 | flags, fence, fence_out, NULL); | ||
577 | } | ||