diff options
-rw-r--r-- | drivers/gpu/nvgpu/Makefile | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/Makefile.sources | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/fifo/submit.c | 577 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.h | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/include/nvgpu/channel.h | 52 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/os/linux/cde.c | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/os/linux/ce2.c | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/os/linux/channel.c | 551 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/os/linux/channel.h | 15 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/os/linux/ioctl_channel.c | 5 | ||||
-rw-r--r-- | include/trace/events/gk20a.h | 4 |
12 files changed, 646 insertions, 569 deletions
diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile index faf17a91..61636ff5 100644 --- a/drivers/gpu/nvgpu/Makefile +++ b/drivers/gpu/nvgpu/Makefile | |||
@@ -177,6 +177,7 @@ nvgpu-y += \ | |||
177 | common/clock_gating/gv11b_gating_reglist.o \ | 177 | common/clock_gating/gv11b_gating_reglist.o \ |
178 | common/sim.o \ | 178 | common/sim.o \ |
179 | common/sim_pci.o \ | 179 | common/sim_pci.o \ |
180 | common/fifo/submit.o \ | ||
180 | gk20a/gk20a.o \ | 181 | gk20a/gk20a.o \ |
181 | gk20a/ce2_gk20a.o \ | 182 | gk20a/ce2_gk20a.o \ |
182 | gk20a/fifo_gk20a.o \ | 183 | gk20a/fifo_gk20a.o \ |
diff --git a/drivers/gpu/nvgpu/Makefile.sources b/drivers/gpu/nvgpu/Makefile.sources index cad9c1e3..942fddea 100644 --- a/drivers/gpu/nvgpu/Makefile.sources +++ b/drivers/gpu/nvgpu/Makefile.sources | |||
@@ -80,6 +80,7 @@ srcs := common/mm/nvgpu_allocator.c \ | |||
80 | common/clock_gating/gv11b_gating_reglist.c \ | 80 | common/clock_gating/gv11b_gating_reglist.c \ |
81 | common/clock_gating/gp106_gating_reglist.c \ | 81 | common/clock_gating/gp106_gating_reglist.c \ |
82 | common/clock_gating/gv100_gating_reglist.c \ | 82 | common/clock_gating/gv100_gating_reglist.c \ |
83 | common/fifo/submit.c \ | ||
83 | boardobj/boardobj.c \ | 84 | boardobj/boardobj.c \ |
84 | boardobj/boardobjgrp.c \ | 85 | boardobj/boardobjgrp.c \ |
85 | boardobj/boardobjgrpmask.c \ | 86 | boardobj/boardobjgrpmask.c \ |
diff --git a/drivers/gpu/nvgpu/common/fifo/submit.c b/drivers/gpu/nvgpu/common/fifo/submit.c new file mode 100644 index 00000000..daeee608 --- /dev/null +++ b/drivers/gpu/nvgpu/common/fifo/submit.c | |||
@@ -0,0 +1,577 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
5 | * copy of this software and associated documentation files (the "Software"), | ||
6 | * to deal in the Software without restriction, including without limitation | ||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
9 | * Software is furnished to do so, subject to the following conditions: | ||
10 | * | ||
11 | * The above copyright notice and this permission notice shall be included in | ||
12 | * all copies or substantial portions of the Software. | ||
13 | * | ||
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
20 | * DEALINGS IN THE SOFTWARE. | ||
21 | */ | ||
22 | |||
23 | #include <nvgpu/channel.h> | ||
24 | #include <nvgpu/ltc.h> | ||
25 | #include <nvgpu/os_sched.h> | ||
26 | |||
27 | #include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h> | ||
28 | |||
29 | #include "gk20a/gk20a.h" | ||
30 | #include "gk20a/channel_gk20a.h" | ||
31 | |||
32 | #include <trace/events/gk20a.h> | ||
33 | |||
34 | /* | ||
35 | * Handle the submit synchronization - pre-fences and post-fences. | ||
36 | */ | ||
37 | static int nvgpu_submit_prepare_syncs(struct channel_gk20a *c, | ||
38 | struct nvgpu_channel_fence *fence, | ||
39 | struct channel_gk20a_job *job, | ||
40 | struct priv_cmd_entry **wait_cmd, | ||
41 | struct priv_cmd_entry **incr_cmd, | ||
42 | struct gk20a_fence **post_fence, | ||
43 | bool register_irq, | ||
44 | u32 flags) | ||
45 | { | ||
46 | struct gk20a *g = c->g; | ||
47 | bool need_sync_fence = false; | ||
48 | bool new_sync_created = false; | ||
49 | int wait_fence_fd = -1; | ||
50 | int err = 0; | ||
51 | bool need_wfi = !(flags & NVGPU_SUBMIT_FLAGS_SUPPRESS_WFI); | ||
52 | bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(c); | ||
53 | |||
54 | if (g->aggressive_sync_destroy_thresh) { | ||
55 | nvgpu_mutex_acquire(&c->sync_lock); | ||
56 | if (!c->sync) { | ||
57 | c->sync = gk20a_channel_sync_create(c, false); | ||
58 | if (!c->sync) { | ||
59 | err = -ENOMEM; | ||
60 | nvgpu_mutex_release(&c->sync_lock); | ||
61 | goto fail; | ||
62 | } | ||
63 | new_sync_created = true; | ||
64 | } | ||
65 | nvgpu_atomic_inc(&c->sync->refcount); | ||
66 | nvgpu_mutex_release(&c->sync_lock); | ||
67 | } | ||
68 | |||
69 | if (g->ops.fifo.resetup_ramfc && new_sync_created) { | ||
70 | err = g->ops.fifo.resetup_ramfc(c); | ||
71 | if (err) | ||
72 | goto fail; | ||
73 | } | ||
74 | |||
75 | /* | ||
76 | * Optionally insert syncpt/semaphore wait in the beginning of gpfifo | ||
77 | * submission when user requested and the wait hasn't expired. | ||
78 | */ | ||
79 | if (flags & NVGPU_SUBMIT_FLAGS_FENCE_WAIT) { | ||
80 | int max_wait_cmds = c->deterministic ? 1 : 0; | ||
81 | |||
82 | if (!pre_alloc_enabled) | ||
83 | job->wait_cmd = nvgpu_kzalloc(g, | ||
84 | sizeof(struct priv_cmd_entry)); | ||
85 | |||
86 | if (!job->wait_cmd) { | ||
87 | err = -ENOMEM; | ||
88 | goto fail; | ||
89 | } | ||
90 | |||
91 | if (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE) { | ||
92 | wait_fence_fd = fence->id; | ||
93 | err = c->sync->wait_fd(c->sync, wait_fence_fd, | ||
94 | job->wait_cmd, max_wait_cmds); | ||
95 | } else { | ||
96 | err = c->sync->wait_syncpt(c->sync, fence->id, | ||
97 | fence->value, | ||
98 | job->wait_cmd); | ||
99 | } | ||
100 | |||
101 | if (err) | ||
102 | goto clean_up_wait_cmd; | ||
103 | |||
104 | if (job->wait_cmd->valid) | ||
105 | *wait_cmd = job->wait_cmd; | ||
106 | } | ||
107 | |||
108 | if ((flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) && | ||
109 | (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE)) | ||
110 | need_sync_fence = true; | ||
111 | |||
112 | /* | ||
113 | * Always generate an increment at the end of a GPFIFO submission. This | ||
114 | * is used to keep track of method completion for idle railgating. The | ||
115 | * sync_pt/semaphore PB is added to the GPFIFO later on in submit. | ||
116 | */ | ||
117 | job->post_fence = gk20a_alloc_fence(c); | ||
118 | if (!job->post_fence) { | ||
119 | err = -ENOMEM; | ||
120 | goto clean_up_wait_cmd; | ||
121 | } | ||
122 | if (!pre_alloc_enabled) | ||
123 | job->incr_cmd = nvgpu_kzalloc(g, sizeof(struct priv_cmd_entry)); | ||
124 | |||
125 | if (!job->incr_cmd) { | ||
126 | err = -ENOMEM; | ||
127 | goto clean_up_post_fence; | ||
128 | } | ||
129 | |||
130 | if (flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) | ||
131 | err = c->sync->incr_user(c->sync, wait_fence_fd, job->incr_cmd, | ||
132 | job->post_fence, need_wfi, need_sync_fence, | ||
133 | register_irq); | ||
134 | else | ||
135 | err = c->sync->incr(c->sync, job->incr_cmd, | ||
136 | job->post_fence, need_sync_fence, | ||
137 | register_irq); | ||
138 | if (!err) { | ||
139 | *incr_cmd = job->incr_cmd; | ||
140 | *post_fence = job->post_fence; | ||
141 | } else | ||
142 | goto clean_up_incr_cmd; | ||
143 | |||
144 | return 0; | ||
145 | |||
146 | clean_up_incr_cmd: | ||
147 | free_priv_cmdbuf(c, job->incr_cmd); | ||
148 | if (!pre_alloc_enabled) | ||
149 | job->incr_cmd = NULL; | ||
150 | clean_up_post_fence: | ||
151 | gk20a_fence_put(job->post_fence); | ||
152 | job->post_fence = NULL; | ||
153 | clean_up_wait_cmd: | ||
154 | if (job->wait_cmd) | ||
155 | free_priv_cmdbuf(c, job->wait_cmd); | ||
156 | if (!pre_alloc_enabled) | ||
157 | job->wait_cmd = NULL; | ||
158 | fail: | ||
159 | *wait_cmd = NULL; | ||
160 | return err; | ||
161 | } | ||
162 | |||
163 | static void nvgpu_submit_append_priv_cmdbuf(struct channel_gk20a *c, | ||
164 | struct priv_cmd_entry *cmd) | ||
165 | { | ||
166 | struct gk20a *g = c->g; | ||
167 | struct nvgpu_mem *gpfifo_mem = &c->gpfifo.mem; | ||
168 | struct nvgpu_gpfifo_entry x = { | ||
169 | .entry0 = u64_lo32(cmd->gva), | ||
170 | .entry1 = u64_hi32(cmd->gva) | | ||
171 | pbdma_gp_entry1_length_f(cmd->size) | ||
172 | }; | ||
173 | |||
174 | nvgpu_mem_wr_n(g, gpfifo_mem, c->gpfifo.put * sizeof(x), | ||
175 | &x, sizeof(x)); | ||
176 | |||
177 | if (cmd->mem->aperture == APERTURE_SYSMEM) | ||
178 | trace_gk20a_push_cmdbuf(g->name, 0, cmd->size, 0, | ||
179 | (u32 *)cmd->mem->cpu_va + cmd->off); | ||
180 | |||
181 | c->gpfifo.put = (c->gpfifo.put + 1) & (c->gpfifo.entry_num - 1); | ||
182 | } | ||
183 | |||
184 | static int nvgpu_submit_append_gpfifo_user_direct(struct channel_gk20a *c, | ||
185 | struct nvgpu_gpfifo_userdata userdata, | ||
186 | u32 num_entries) | ||
187 | { | ||
188 | struct gk20a *g = c->g; | ||
189 | struct nvgpu_gpfifo_entry *gpfifo_cpu = c->gpfifo.mem.cpu_va; | ||
190 | u32 gpfifo_size = c->gpfifo.entry_num; | ||
191 | u32 len = num_entries; | ||
192 | u32 start = c->gpfifo.put; | ||
193 | u32 end = start + len; /* exclusive */ | ||
194 | int err; | ||
195 | |||
196 | if (end > gpfifo_size) { | ||
197 | /* wrap-around */ | ||
198 | int length0 = gpfifo_size - start; | ||
199 | int length1 = len - length0; | ||
200 | |||
201 | err = g->os_channel.copy_user_gpfifo( | ||
202 | gpfifo_cpu + start, userdata, | ||
203 | 0, length0); | ||
204 | if (err) | ||
205 | return err; | ||
206 | |||
207 | err = g->os_channel.copy_user_gpfifo( | ||
208 | gpfifo_cpu, userdata, | ||
209 | length0, length1); | ||
210 | if (err) | ||
211 | return err; | ||
212 | } else { | ||
213 | err = g->os_channel.copy_user_gpfifo( | ||
214 | gpfifo_cpu + start, userdata, | ||
215 | 0, len); | ||
216 | if (err) | ||
217 | return err; | ||
218 | } | ||
219 | |||
220 | return 0; | ||
221 | } | ||
222 | |||
223 | static void nvgpu_submit_append_gpfifo_common(struct channel_gk20a *c, | ||
224 | struct nvgpu_gpfifo_entry *src, u32 num_entries) | ||
225 | { | ||
226 | struct gk20a *g = c->g; | ||
227 | struct nvgpu_mem *gpfifo_mem = &c->gpfifo.mem; | ||
228 | /* in bytes */ | ||
229 | u32 gpfifo_size = | ||
230 | c->gpfifo.entry_num * sizeof(struct nvgpu_gpfifo_entry); | ||
231 | u32 len = num_entries * sizeof(struct nvgpu_gpfifo_entry); | ||
232 | u32 start = c->gpfifo.put * sizeof(struct nvgpu_gpfifo_entry); | ||
233 | u32 end = start + len; /* exclusive */ | ||
234 | |||
235 | if (end > gpfifo_size) { | ||
236 | /* wrap-around */ | ||
237 | int length0 = gpfifo_size - start; | ||
238 | int length1 = len - length0; | ||
239 | struct nvgpu_gpfifo_entry *src2 = src + length0; | ||
240 | |||
241 | nvgpu_mem_wr_n(g, gpfifo_mem, start, src, length0); | ||
242 | nvgpu_mem_wr_n(g, gpfifo_mem, 0, src2, length1); | ||
243 | } else { | ||
244 | nvgpu_mem_wr_n(g, gpfifo_mem, start, src, len); | ||
245 | } | ||
246 | } | ||
247 | |||
248 | /* | ||
249 | * Copy source gpfifo entries into the gpfifo ring buffer, potentially | ||
250 | * splitting into two memcpys to handle wrap-around. | ||
251 | */ | ||
252 | static int nvgpu_submit_append_gpfifo(struct channel_gk20a *c, | ||
253 | struct nvgpu_gpfifo_entry *kern_gpfifo, | ||
254 | struct nvgpu_gpfifo_userdata userdata, | ||
255 | u32 num_entries) | ||
256 | { | ||
257 | struct gk20a *g = c->g; | ||
258 | int err; | ||
259 | |||
260 | if (!kern_gpfifo && !c->gpfifo.pipe) { | ||
261 | /* | ||
262 | * This path (from userspace to sysmem) is special in order to | ||
263 | * avoid two copies unnecessarily (from user to pipe, then from | ||
264 | * pipe to gpu sysmem buffer). | ||
265 | */ | ||
266 | err = nvgpu_submit_append_gpfifo_user_direct(c, userdata, | ||
267 | num_entries); | ||
268 | if (err) | ||
269 | return err; | ||
270 | } else if (!kern_gpfifo) { | ||
271 | /* from userspace to vidmem, use the common path */ | ||
272 | err = g->os_channel.copy_user_gpfifo(c->gpfifo.pipe, userdata, | ||
273 | 0, num_entries); | ||
274 | if (err) | ||
275 | return err; | ||
276 | |||
277 | nvgpu_submit_append_gpfifo_common(c, c->gpfifo.pipe, | ||
278 | num_entries); | ||
279 | } else { | ||
280 | /* from kernel to either sysmem or vidmem, don't need | ||
281 | * copy_user_gpfifo so use the common path */ | ||
282 | nvgpu_submit_append_gpfifo_common(c, kern_gpfifo, num_entries); | ||
283 | } | ||
284 | |||
285 | trace_write_pushbuffers(c, num_entries); | ||
286 | |||
287 | c->gpfifo.put = (c->gpfifo.put + num_entries) & | ||
288 | (c->gpfifo.entry_num - 1); | ||
289 | |||
290 | return 0; | ||
291 | } | ||
292 | |||
293 | static int nvgpu_submit_channel_gpfifo(struct channel_gk20a *c, | ||
294 | struct nvgpu_gpfifo_entry *gpfifo, | ||
295 | struct nvgpu_gpfifo_userdata userdata, | ||
296 | u32 num_entries, | ||
297 | u32 flags, | ||
298 | struct nvgpu_channel_fence *fence, | ||
299 | struct gk20a_fence **fence_out, | ||
300 | struct fifo_profile_gk20a *profile) | ||
301 | { | ||
302 | struct gk20a *g = c->g; | ||
303 | struct priv_cmd_entry *wait_cmd = NULL; | ||
304 | struct priv_cmd_entry *incr_cmd = NULL; | ||
305 | struct gk20a_fence *post_fence = NULL; | ||
306 | struct channel_gk20a_job *job = NULL; | ||
307 | /* we might need two extra gpfifo entries - one for pre fence | ||
308 | * and one for post fence. */ | ||
309 | const int extra_entries = 2; | ||
310 | bool skip_buffer_refcounting = (flags & | ||
311 | NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING); | ||
312 | int err = 0; | ||
313 | bool need_job_tracking; | ||
314 | bool need_deferred_cleanup = false; | ||
315 | |||
316 | if (nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING)) | ||
317 | return -ENODEV; | ||
318 | |||
319 | if (c->has_timedout) | ||
320 | return -ETIMEDOUT; | ||
321 | |||
322 | if (!nvgpu_mem_is_valid(&c->gpfifo.mem)) | ||
323 | return -ENOMEM; | ||
324 | |||
325 | /* fifo not large enough for request. Return error immediately. | ||
326 | * Kernel can insert gpfifo entries before and after user gpfifos. | ||
327 | * So, add extra_entries in user request. Also, HW with fifo size N | ||
328 | * can accept only N-1 entreis and so the below condition */ | ||
329 | if (c->gpfifo.entry_num - 1 < num_entries + extra_entries) { | ||
330 | nvgpu_err(g, "not enough gpfifo space allocated"); | ||
331 | return -ENOMEM; | ||
332 | } | ||
333 | |||
334 | if ((flags & (NVGPU_SUBMIT_FLAGS_FENCE_WAIT | | ||
335 | NVGPU_SUBMIT_FLAGS_FENCE_GET)) && | ||
336 | !fence) | ||
337 | return -EINVAL; | ||
338 | |||
339 | /* an address space needs to have been bound at this point. */ | ||
340 | if (!gk20a_channel_as_bound(c)) { | ||
341 | nvgpu_err(g, | ||
342 | "not bound to an address space at time of gpfifo" | ||
343 | " submission."); | ||
344 | return -EINVAL; | ||
345 | } | ||
346 | |||
347 | gk20a_fifo_profile_snapshot(profile, PROFILE_ENTRY); | ||
348 | |||
349 | /* update debug settings */ | ||
350 | nvgpu_ltc_sync_enabled(g); | ||
351 | |||
352 | nvgpu_log_info(g, "channel %d", c->chid); | ||
353 | |||
354 | /* | ||
355 | * Job tracking is necessary for any of the following conditions: | ||
356 | * - pre- or post-fence functionality | ||
357 | * - channel wdt | ||
358 | * - GPU rail-gating with non-deterministic channels | ||
359 | * - buffer refcounting | ||
360 | * | ||
361 | * If none of the conditions are met, then job tracking is not | ||
362 | * required and a fast submit can be done (ie. only need to write | ||
363 | * out userspace GPFIFO entries and update GP_PUT). | ||
364 | */ | ||
365 | need_job_tracking = (flags & NVGPU_SUBMIT_FLAGS_FENCE_WAIT) || | ||
366 | (flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) || | ||
367 | c->timeout.enabled || | ||
368 | (g->can_railgate && !c->deterministic) || | ||
369 | !skip_buffer_refcounting; | ||
370 | |||
371 | if (need_job_tracking) { | ||
372 | bool need_sync_framework = false; | ||
373 | |||
374 | /* | ||
375 | * If the channel is to have deterministic latency and | ||
376 | * job tracking is required, the channel must have | ||
377 | * pre-allocated resources. Otherwise, we fail the submit here | ||
378 | */ | ||
379 | if (c->deterministic && !channel_gk20a_is_prealloc_enabled(c)) | ||
380 | return -EINVAL; | ||
381 | |||
382 | need_sync_framework = | ||
383 | gk20a_channel_sync_needs_sync_framework(g) || | ||
384 | (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE && | ||
385 | flags & NVGPU_SUBMIT_FLAGS_FENCE_GET); | ||
386 | |||
387 | /* | ||
388 | * Deferred clean-up is necessary for any of the following | ||
389 | * conditions: | ||
390 | * - channel's deterministic flag is not set | ||
391 | * - dependency on sync framework, which could make the | ||
392 | * behavior of the clean-up operation non-deterministic | ||
393 | * (should not be performed in the submit path) | ||
394 | * - channel wdt | ||
395 | * - GPU rail-gating with non-deterministic channels | ||
396 | * - buffer refcounting | ||
397 | * | ||
398 | * If none of the conditions are met, then deferred clean-up | ||
399 | * is not required, and we clean-up one job-tracking | ||
400 | * resource in the submit path. | ||
401 | */ | ||
402 | need_deferred_cleanup = !c->deterministic || | ||
403 | need_sync_framework || | ||
404 | c->timeout.enabled || | ||
405 | (g->can_railgate && | ||
406 | !c->deterministic) || | ||
407 | !skip_buffer_refcounting; | ||
408 | |||
409 | /* | ||
410 | * For deterministic channels, we don't allow deferred clean_up | ||
411 | * processing to occur. In cases we hit this, we fail the submit | ||
412 | */ | ||
413 | if (c->deterministic && need_deferred_cleanup) | ||
414 | return -EINVAL; | ||
415 | |||
416 | if (!c->deterministic) { | ||
417 | /* | ||
418 | * Get a power ref unless this is a deterministic | ||
419 | * channel that holds them during the channel lifetime. | ||
420 | * This one is released by gk20a_channel_clean_up_jobs, | ||
421 | * via syncpt or sema interrupt, whichever is used. | ||
422 | */ | ||
423 | err = gk20a_busy(g); | ||
424 | if (err) { | ||
425 | nvgpu_err(g, | ||
426 | "failed to host gk20a to submit gpfifo"); | ||
427 | nvgpu_print_current(g, NULL, NVGPU_ERROR); | ||
428 | return err; | ||
429 | } | ||
430 | } | ||
431 | |||
432 | if (!need_deferred_cleanup) { | ||
433 | /* clean up a single job */ | ||
434 | gk20a_channel_clean_up_jobs(c, false); | ||
435 | } | ||
436 | } | ||
437 | |||
438 | |||
439 | /* Grab access to HW to deal with do_idle */ | ||
440 | if (c->deterministic) | ||
441 | nvgpu_rwsem_down_read(&g->deterministic_busy); | ||
442 | |||
443 | if (c->deterministic && c->deterministic_railgate_allowed) { | ||
444 | /* | ||
445 | * Nope - this channel has dropped its own power ref. As | ||
446 | * deterministic submits don't hold power on per each submitted | ||
447 | * job like normal ones do, the GPU might railgate any time now | ||
448 | * and thus submit is disallowed. | ||
449 | */ | ||
450 | err = -EINVAL; | ||
451 | goto clean_up; | ||
452 | } | ||
453 | |||
454 | trace_gk20a_channel_submit_gpfifo(g->name, | ||
455 | c->chid, | ||
456 | num_entries, | ||
457 | flags, | ||
458 | fence ? fence->id : 0, | ||
459 | fence ? fence->value : 0); | ||
460 | |||
461 | nvgpu_log_info(g, "pre-submit put %d, get %d, size %d", | ||
462 | c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num); | ||
463 | |||
464 | /* | ||
465 | * Make sure we have enough space for gpfifo entries. Check cached | ||
466 | * values first and then read from HW. If no space, return EAGAIN | ||
467 | * and let userpace decide to re-try request or not. | ||
468 | */ | ||
469 | if (nvgpu_gp_free_count(c) < num_entries + extra_entries) { | ||
470 | if (nvgpu_get_gp_free_count(c) < num_entries + extra_entries) { | ||
471 | err = -EAGAIN; | ||
472 | goto clean_up; | ||
473 | } | ||
474 | } | ||
475 | |||
476 | if (c->has_timedout) { | ||
477 | err = -ETIMEDOUT; | ||
478 | goto clean_up; | ||
479 | } | ||
480 | |||
481 | if (need_job_tracking) { | ||
482 | err = channel_gk20a_alloc_job(c, &job); | ||
483 | if (err) | ||
484 | goto clean_up; | ||
485 | |||
486 | err = nvgpu_submit_prepare_syncs(c, fence, job, | ||
487 | &wait_cmd, &incr_cmd, | ||
488 | &post_fence, | ||
489 | need_deferred_cleanup, | ||
490 | flags); | ||
491 | if (err) | ||
492 | goto clean_up_job; | ||
493 | } | ||
494 | |||
495 | gk20a_fifo_profile_snapshot(profile, PROFILE_JOB_TRACKING); | ||
496 | |||
497 | if (wait_cmd) | ||
498 | nvgpu_submit_append_priv_cmdbuf(c, wait_cmd); | ||
499 | |||
500 | err = nvgpu_submit_append_gpfifo(c, gpfifo, userdata, | ||
501 | num_entries); | ||
502 | if (err) | ||
503 | goto clean_up_job; | ||
504 | |||
505 | /* | ||
506 | * And here's where we add the incr_cmd we generated earlier. It should | ||
507 | * always run! | ||
508 | */ | ||
509 | if (incr_cmd) | ||
510 | nvgpu_submit_append_priv_cmdbuf(c, incr_cmd); | ||
511 | |||
512 | if (fence_out) | ||
513 | *fence_out = gk20a_fence_get(post_fence); | ||
514 | |||
515 | if (need_job_tracking) | ||
516 | /* TODO! Check for errors... */ | ||
517 | gk20a_channel_add_job(c, job, skip_buffer_refcounting); | ||
518 | gk20a_fifo_profile_snapshot(profile, PROFILE_APPEND); | ||
519 | |||
520 | g->ops.fifo.userd_gp_put(g, c); | ||
521 | |||
522 | /* No hw access beyond this point */ | ||
523 | if (c->deterministic) | ||
524 | nvgpu_rwsem_up_read(&g->deterministic_busy); | ||
525 | |||
526 | trace_gk20a_channel_submitted_gpfifo(g->name, | ||
527 | c->chid, | ||
528 | num_entries, | ||
529 | flags, | ||
530 | post_fence ? post_fence->syncpt_id : 0, | ||
531 | post_fence ? post_fence->syncpt_value : 0); | ||
532 | |||
533 | nvgpu_log_info(g, "post-submit put %d, get %d, size %d", | ||
534 | c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num); | ||
535 | |||
536 | gk20a_fifo_profile_snapshot(profile, PROFILE_END); | ||
537 | |||
538 | nvgpu_log_fn(g, "done"); | ||
539 | return err; | ||
540 | |||
541 | clean_up_job: | ||
542 | channel_gk20a_free_job(c, job); | ||
543 | clean_up: | ||
544 | nvgpu_log_fn(g, "fail"); | ||
545 | gk20a_fence_put(post_fence); | ||
546 | if (c->deterministic) | ||
547 | nvgpu_rwsem_up_read(&g->deterministic_busy); | ||
548 | else if (need_deferred_cleanup) | ||
549 | gk20a_idle(g); | ||
550 | |||
551 | return err; | ||
552 | } | ||
553 | |||
554 | int nvgpu_submit_channel_gpfifo_user(struct channel_gk20a *c, | ||
555 | struct nvgpu_gpfifo_userdata userdata, | ||
556 | u32 num_entries, | ||
557 | u32 flags, | ||
558 | struct nvgpu_channel_fence *fence, | ||
559 | struct gk20a_fence **fence_out, | ||
560 | struct fifo_profile_gk20a *profile) | ||
561 | { | ||
562 | return nvgpu_submit_channel_gpfifo(c, NULL, userdata, num_entries, | ||
563 | flags, fence, fence_out, profile); | ||
564 | } | ||
565 | |||
566 | int nvgpu_submit_channel_gpfifo_kernel(struct channel_gk20a *c, | ||
567 | struct nvgpu_gpfifo_entry *gpfifo, | ||
568 | u32 num_entries, | ||
569 | u32 flags, | ||
570 | struct nvgpu_channel_fence *fence, | ||
571 | struct gk20a_fence **fence_out) | ||
572 | { | ||
573 | struct nvgpu_gpfifo_userdata userdata = { NULL, NULL }; | ||
574 | |||
575 | return nvgpu_submit_channel_gpfifo(c, gpfifo, userdata, num_entries, | ||
576 | flags, fence, fence_out, NULL); | ||
577 | } | ||
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h index aa37db62..78325019 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h | |||
@@ -47,7 +47,7 @@ struct fifo_profile_gk20a; | |||
47 | #define NVGPU_GPFIFO_FLAGS_REPLAYABLE_FAULTS_ENABLE (1 << 2) | 47 | #define NVGPU_GPFIFO_FLAGS_REPLAYABLE_FAULTS_ENABLE (1 << 2) |
48 | #define NVGPU_GPFIFO_FLAGS_USERMODE_SUPPORT (1 << 3) | 48 | #define NVGPU_GPFIFO_FLAGS_USERMODE_SUPPORT (1 << 3) |
49 | 49 | ||
50 | /* Flags to be passed to gk20a_submit_channel_gpfifo() */ | 50 | /* Flags to be passed to nvgpu_submit_channel_gpfifo() */ |
51 | #define NVGPU_SUBMIT_FLAGS_FENCE_WAIT (1 << 0) | 51 | #define NVGPU_SUBMIT_FLAGS_FENCE_WAIT (1 << 0) |
52 | #define NVGPU_SUBMIT_FLAGS_FENCE_GET (1 << 1) | 52 | #define NVGPU_SUBMIT_FLAGS_FENCE_GET (1 << 1) |
53 | #define NVGPU_SUBMIT_FLAGS_HW_FORMAT (1 << 2) | 53 | #define NVGPU_SUBMIT_FLAGS_HW_FORMAT (1 << 2) |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 9061236e..3c25f8fb 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -150,6 +150,7 @@ enum nvgpu_unit; | |||
150 | enum nvgpu_flush_op; | 150 | enum nvgpu_flush_op; |
151 | 151 | ||
152 | struct _resmgr_context; | 152 | struct _resmgr_context; |
153 | struct nvgpu_gpfifo_entry; | ||
153 | 154 | ||
154 | struct nvgpu_gpfifo_userdata { | 155 | struct nvgpu_gpfifo_userdata { |
155 | struct nvgpu_gpfifo_entry __user *entries; | 156 | struct nvgpu_gpfifo_entry __user *entries; |
diff --git a/drivers/gpu/nvgpu/include/nvgpu/channel.h b/drivers/gpu/nvgpu/include/nvgpu/channel.h new file mode 100644 index 00000000..604083d4 --- /dev/null +++ b/drivers/gpu/nvgpu/include/nvgpu/channel.h | |||
@@ -0,0 +1,52 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
5 | * copy of this software and associated documentation files (the "Software"), | ||
6 | * to deal in the Software without restriction, including without limitation | ||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
9 | * Software is furnished to do so, subject to the following conditions: | ||
10 | * | ||
11 | * The above copyright notice and this permission notice shall be included in | ||
12 | * all copies or substantial portions of the Software. | ||
13 | * | ||
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
20 | * DEALINGS IN THE SOFTWARE. | ||
21 | */ | ||
22 | |||
23 | #include <nvgpu/types.h> | ||
24 | |||
25 | #include "gk20a/gk20a.h" | ||
26 | |||
27 | struct nvgpu_channel_fence; | ||
28 | struct gk20a_fence; | ||
29 | struct fifo_profile_gk20a; | ||
30 | |||
31 | int nvgpu_submit_channel_gpfifo_user(struct channel_gk20a *c, | ||
32 | struct nvgpu_gpfifo_userdata userdata, | ||
33 | u32 num_entries, | ||
34 | u32 flags, | ||
35 | struct nvgpu_channel_fence *fence, | ||
36 | struct gk20a_fence **fence_out, | ||
37 | struct fifo_profile_gk20a *profile); | ||
38 | |||
39 | int nvgpu_submit_channel_gpfifo_kernel(struct channel_gk20a *c, | ||
40 | struct nvgpu_gpfifo_entry *gpfifo, | ||
41 | u32 num_entries, | ||
42 | u32 flags, | ||
43 | struct nvgpu_channel_fence *fence, | ||
44 | struct gk20a_fence **fence_out); | ||
45 | |||
46 | #ifdef CONFIG_DEBUG_FS | ||
47 | void trace_write_pushbuffers(struct channel_gk20a *c, int count); | ||
48 | #else | ||
49 | static inline void trace_write_pushbuffers(struct channel_gk20a *c, int count) | ||
50 | { | ||
51 | } | ||
52 | #endif | ||
diff --git a/drivers/gpu/nvgpu/os/linux/cde.c b/drivers/gpu/nvgpu/os/linux/cde.c index 052a1d21..39b7d1f5 100644 --- a/drivers/gpu/nvgpu/os/linux/cde.c +++ b/drivers/gpu/nvgpu/os/linux/cde.c | |||
@@ -32,6 +32,7 @@ | |||
32 | #include <nvgpu/bug.h> | 32 | #include <nvgpu/bug.h> |
33 | #include <nvgpu/firmware.h> | 33 | #include <nvgpu/firmware.h> |
34 | #include <nvgpu/os_sched.h> | 34 | #include <nvgpu/os_sched.h> |
35 | #include <nvgpu/channel.h> | ||
35 | 36 | ||
36 | #include <nvgpu/linux/vm.h> | 37 | #include <nvgpu/linux/vm.h> |
37 | 38 | ||
@@ -783,7 +784,7 @@ static int gk20a_cde_execute_buffer(struct gk20a_cde_ctx *cde_ctx, | |||
783 | return -ENOSYS; | 784 | return -ENOSYS; |
784 | } | 785 | } |
785 | 786 | ||
786 | return gk20a_submit_channel_gpfifo_kernel(cde_ctx->ch, gpfifo, | 787 | return nvgpu_submit_channel_gpfifo_kernel(cde_ctx->ch, gpfifo, |
787 | num_entries, flags, fence, fence_out); | 788 | num_entries, flags, fence, fence_out); |
788 | } | 789 | } |
789 | 790 | ||
diff --git a/drivers/gpu/nvgpu/os/linux/ce2.c b/drivers/gpu/nvgpu/os/linux/ce2.c index 8f20091b..0b43c0d1 100644 --- a/drivers/gpu/nvgpu/os/linux/ce2.c +++ b/drivers/gpu/nvgpu/os/linux/ce2.c | |||
@@ -15,6 +15,7 @@ | |||
15 | */ | 15 | */ |
16 | 16 | ||
17 | #include <nvgpu/types.h> | 17 | #include <nvgpu/types.h> |
18 | #include <nvgpu/channel.h> | ||
18 | 19 | ||
19 | #include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h> | 20 | #include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h> |
20 | 21 | ||
@@ -130,7 +131,7 @@ int gk20a_ce_execute_ops(struct gk20a *g, | |||
130 | 131 | ||
131 | nvgpu_smp_wmb(); | 132 | nvgpu_smp_wmb(); |
132 | 133 | ||
133 | ret = gk20a_submit_channel_gpfifo_kernel(ce_ctx->ch, &gpfifo, | 134 | ret = nvgpu_submit_channel_gpfifo_kernel(ce_ctx->ch, &gpfifo, |
134 | 1, submit_flags, &fence, &ce_cmd_buf_fence_out); | 135 | 1, submit_flags, &fence, &ce_cmd_buf_fence_out); |
135 | 136 | ||
136 | if (!ret) { | 137 | if (!ret) { |
diff --git a/drivers/gpu/nvgpu/os/linux/channel.c b/drivers/gpu/nvgpu/os/linux/channel.c index 391950af..fef44f2b 100644 --- a/drivers/gpu/nvgpu/os/linux/channel.c +++ b/drivers/gpu/nvgpu/os/linux/channel.c | |||
@@ -16,7 +16,6 @@ | |||
16 | 16 | ||
17 | #include <nvgpu/enabled.h> | 17 | #include <nvgpu/enabled.h> |
18 | #include <nvgpu/debug.h> | 18 | #include <nvgpu/debug.h> |
19 | #include <nvgpu/ltc.h> | ||
20 | #include <nvgpu/error_notifier.h> | 19 | #include <nvgpu/error_notifier.h> |
21 | #include <nvgpu/os_sched.h> | 20 | #include <nvgpu/os_sched.h> |
22 | 21 | ||
@@ -489,11 +488,9 @@ static void trace_write_pushbuffer(struct channel_gk20a *c, | |||
489 | dma_buf_vunmap(dmabuf, mem); | 488 | dma_buf_vunmap(dmabuf, mem); |
490 | } | 489 | } |
491 | } | 490 | } |
492 | #endif | ||
493 | 491 | ||
494 | static void trace_write_pushbuffers(struct channel_gk20a *c, u32 count) | 492 | void trace_write_pushbuffers(struct channel_gk20a *c, u32 count) |
495 | { | 493 | { |
496 | #ifdef CONFIG_DEBUG_FS | ||
497 | struct nvgpu_gpfifo_entry *gp = c->gpfifo.mem.cpu_va; | 494 | struct nvgpu_gpfifo_entry *gp = c->gpfifo.mem.cpu_va; |
498 | u32 n = c->gpfifo.entry_num; | 495 | u32 n = c->gpfifo.entry_num; |
499 | u32 start = c->gpfifo.put; | 496 | u32 start = c->gpfifo.put; |
@@ -507,549 +504,5 @@ static void trace_write_pushbuffers(struct channel_gk20a *c, u32 count) | |||
507 | 504 | ||
508 | for (i = 0; i < count; i++) | 505 | for (i = 0; i < count; i++) |
509 | trace_write_pushbuffer(c, &gp[(start + i) % n]); | 506 | trace_write_pushbuffer(c, &gp[(start + i) % n]); |
510 | #endif | ||
511 | } | ||
512 | |||
513 | /* | ||
514 | * Handle the submit synchronization - pre-fences and post-fences. | ||
515 | */ | ||
516 | static int gk20a_submit_prepare_syncs(struct channel_gk20a *c, | ||
517 | struct nvgpu_channel_fence *fence, | ||
518 | struct channel_gk20a_job *job, | ||
519 | struct priv_cmd_entry **wait_cmd, | ||
520 | struct priv_cmd_entry **incr_cmd, | ||
521 | struct gk20a_fence **post_fence, | ||
522 | bool register_irq, | ||
523 | u32 flags) | ||
524 | { | ||
525 | struct gk20a *g = c->g; | ||
526 | bool need_sync_fence = false; | ||
527 | bool new_sync_created = false; | ||
528 | int wait_fence_fd = -1; | ||
529 | int err = 0; | ||
530 | bool need_wfi = !(flags & NVGPU_SUBMIT_FLAGS_SUPPRESS_WFI); | ||
531 | bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(c); | ||
532 | |||
533 | if (g->aggressive_sync_destroy_thresh) { | ||
534 | nvgpu_mutex_acquire(&c->sync_lock); | ||
535 | if (!c->sync) { | ||
536 | c->sync = gk20a_channel_sync_create(c, false); | ||
537 | if (!c->sync) { | ||
538 | err = -ENOMEM; | ||
539 | nvgpu_mutex_release(&c->sync_lock); | ||
540 | goto fail; | ||
541 | } | ||
542 | new_sync_created = true; | ||
543 | } | ||
544 | nvgpu_atomic_inc(&c->sync->refcount); | ||
545 | nvgpu_mutex_release(&c->sync_lock); | ||
546 | } | ||
547 | |||
548 | if (g->ops.fifo.resetup_ramfc && new_sync_created) { | ||
549 | err = g->ops.fifo.resetup_ramfc(c); | ||
550 | if (err) | ||
551 | goto fail; | ||
552 | } | ||
553 | |||
554 | /* | ||
555 | * Optionally insert syncpt/semaphore wait in the beginning of gpfifo | ||
556 | * submission when user requested and the wait hasn't expired. | ||
557 | */ | ||
558 | if (flags & NVGPU_SUBMIT_FLAGS_FENCE_WAIT) { | ||
559 | int max_wait_cmds = c->deterministic ? 1 : 0; | ||
560 | |||
561 | if (!pre_alloc_enabled) | ||
562 | job->wait_cmd = nvgpu_kzalloc(g, | ||
563 | sizeof(struct priv_cmd_entry)); | ||
564 | |||
565 | if (!job->wait_cmd) { | ||
566 | err = -ENOMEM; | ||
567 | goto fail; | ||
568 | } | ||
569 | |||
570 | if (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE) { | ||
571 | wait_fence_fd = fence->id; | ||
572 | err = c->sync->wait_fd(c->sync, wait_fence_fd, | ||
573 | job->wait_cmd, max_wait_cmds); | ||
574 | } else { | ||
575 | err = c->sync->wait_syncpt(c->sync, fence->id, | ||
576 | fence->value, | ||
577 | job->wait_cmd); | ||
578 | } | ||
579 | |||
580 | if (err) | ||
581 | goto clean_up_wait_cmd; | ||
582 | |||
583 | if (job->wait_cmd->valid) | ||
584 | *wait_cmd = job->wait_cmd; | ||
585 | } | ||
586 | |||
587 | if ((flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) && | ||
588 | (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE)) | ||
589 | need_sync_fence = true; | ||
590 | |||
591 | /* | ||
592 | * Always generate an increment at the end of a GPFIFO submission. This | ||
593 | * is used to keep track of method completion for idle railgating. The | ||
594 | * sync_pt/semaphore PB is added to the GPFIFO later on in submit. | ||
595 | */ | ||
596 | job->post_fence = gk20a_alloc_fence(c); | ||
597 | if (!job->post_fence) { | ||
598 | err = -ENOMEM; | ||
599 | goto clean_up_wait_cmd; | ||
600 | } | ||
601 | if (!pre_alloc_enabled) | ||
602 | job->incr_cmd = nvgpu_kzalloc(g, sizeof(struct priv_cmd_entry)); | ||
603 | |||
604 | if (!job->incr_cmd) { | ||
605 | err = -ENOMEM; | ||
606 | goto clean_up_post_fence; | ||
607 | } | ||
608 | |||
609 | if (flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) | ||
610 | err = c->sync->incr_user(c->sync, wait_fence_fd, job->incr_cmd, | ||
611 | job->post_fence, need_wfi, need_sync_fence, | ||
612 | register_irq); | ||
613 | else | ||
614 | err = c->sync->incr(c->sync, job->incr_cmd, | ||
615 | job->post_fence, need_sync_fence, | ||
616 | register_irq); | ||
617 | if (!err) { | ||
618 | *incr_cmd = job->incr_cmd; | ||
619 | *post_fence = job->post_fence; | ||
620 | } else | ||
621 | goto clean_up_incr_cmd; | ||
622 | |||
623 | return 0; | ||
624 | |||
625 | clean_up_incr_cmd: | ||
626 | free_priv_cmdbuf(c, job->incr_cmd); | ||
627 | if (!pre_alloc_enabled) | ||
628 | job->incr_cmd = NULL; | ||
629 | clean_up_post_fence: | ||
630 | gk20a_fence_put(job->post_fence); | ||
631 | job->post_fence = NULL; | ||
632 | clean_up_wait_cmd: | ||
633 | if (job->wait_cmd) | ||
634 | free_priv_cmdbuf(c, job->wait_cmd); | ||
635 | if (!pre_alloc_enabled) | ||
636 | job->wait_cmd = NULL; | ||
637 | fail: | ||
638 | *wait_cmd = NULL; | ||
639 | return err; | ||
640 | } | ||
641 | |||
642 | static void gk20a_submit_append_priv_cmdbuf(struct channel_gk20a *c, | ||
643 | struct priv_cmd_entry *cmd) | ||
644 | { | ||
645 | struct gk20a *g = c->g; | ||
646 | struct nvgpu_mem *gpfifo_mem = &c->gpfifo.mem; | ||
647 | struct nvgpu_gpfifo_entry x = { | ||
648 | .entry0 = u64_lo32(cmd->gva), | ||
649 | .entry1 = u64_hi32(cmd->gva) | | ||
650 | pbdma_gp_entry1_length_f(cmd->size) | ||
651 | }; | ||
652 | |||
653 | nvgpu_mem_wr_n(g, gpfifo_mem, c->gpfifo.put * sizeof(x), | ||
654 | &x, sizeof(x)); | ||
655 | |||
656 | if (cmd->mem->aperture == APERTURE_SYSMEM) | ||
657 | trace_gk20a_push_cmdbuf(g->name, 0, cmd->size, 0, | ||
658 | (u32 *)cmd->mem->cpu_va + cmd->off); | ||
659 | |||
660 | c->gpfifo.put = (c->gpfifo.put + 1) & (c->gpfifo.entry_num - 1); | ||
661 | } | ||
662 | |||
663 | static int nvgpu_submit_append_gpfifo_user_direct(struct channel_gk20a *c, | ||
664 | struct nvgpu_gpfifo_userdata userdata, | ||
665 | u32 num_entries) | ||
666 | { | ||
667 | struct gk20a *g = c->g; | ||
668 | struct nvgpu_gpfifo_entry *gpfifo_cpu = c->gpfifo.mem.cpu_va; | ||
669 | u32 gpfifo_size = c->gpfifo.entry_num; | ||
670 | u32 len = num_entries; | ||
671 | u32 start = c->gpfifo.put; | ||
672 | u32 end = start + len; /* exclusive */ | ||
673 | int err; | ||
674 | |||
675 | if (end > gpfifo_size) { | ||
676 | /* wrap-around */ | ||
677 | int length0 = gpfifo_size - start; | ||
678 | int length1 = len - length0; | ||
679 | |||
680 | err = g->os_channel.copy_user_gpfifo( | ||
681 | gpfifo_cpu + start, userdata, | ||
682 | 0, length0); | ||
683 | if (err) | ||
684 | return err; | ||
685 | |||
686 | err = g->os_channel.copy_user_gpfifo( | ||
687 | gpfifo_cpu, userdata, | ||
688 | length0, length1); | ||
689 | if (err) | ||
690 | return err; | ||
691 | } else { | ||
692 | err = g->os_channel.copy_user_gpfifo( | ||
693 | gpfifo_cpu + start, userdata, | ||
694 | 0, len); | ||
695 | if (err) | ||
696 | return err; | ||
697 | } | ||
698 | |||
699 | return 0; | ||
700 | } | ||
701 | |||
702 | static void nvgpu_submit_append_gpfifo_common(struct channel_gk20a *c, | ||
703 | struct nvgpu_gpfifo_entry *src, u32 num_entries) | ||
704 | { | ||
705 | struct gk20a *g = c->g; | ||
706 | struct nvgpu_mem *gpfifo_mem = &c->gpfifo.mem; | ||
707 | /* in bytes */ | ||
708 | u32 gpfifo_size = | ||
709 | c->gpfifo.entry_num * sizeof(struct nvgpu_gpfifo_entry); | ||
710 | u32 len = num_entries * sizeof(struct nvgpu_gpfifo_entry); | ||
711 | u32 start = c->gpfifo.put * sizeof(struct nvgpu_gpfifo_entry); | ||
712 | u32 end = start + len; /* exclusive */ | ||
713 | |||
714 | if (end > gpfifo_size) { | ||
715 | /* wrap-around */ | ||
716 | int length0 = gpfifo_size - start; | ||
717 | int length1 = len - length0; | ||
718 | struct nvgpu_gpfifo_entry *src2 = src + length0; | ||
719 | |||
720 | nvgpu_mem_wr_n(g, gpfifo_mem, start, src, length0); | ||
721 | nvgpu_mem_wr_n(g, gpfifo_mem, 0, src2, length1); | ||
722 | } else { | ||
723 | nvgpu_mem_wr_n(g, gpfifo_mem, start, src, len); | ||
724 | } | ||
725 | } | ||
726 | |||
727 | /* | ||
728 | * Copy source gpfifo entries into the gpfifo ring buffer, potentially | ||
729 | * splitting into two memcpys to handle wrap-around. | ||
730 | */ | ||
731 | static int nvgpu_submit_append_gpfifo(struct channel_gk20a *c, | ||
732 | struct nvgpu_gpfifo_entry *kern_gpfifo, | ||
733 | struct nvgpu_gpfifo_userdata userdata, | ||
734 | u32 num_entries) | ||
735 | { | ||
736 | struct gk20a *g = c->g; | ||
737 | int err; | ||
738 | |||
739 | if (!kern_gpfifo && !c->gpfifo.pipe) { | ||
740 | /* | ||
741 | * This path (from userspace to sysmem) is special in order to | ||
742 | * avoid two copies unnecessarily (from user to pipe, then from | ||
743 | * pipe to gpu sysmem buffer). | ||
744 | */ | ||
745 | err = nvgpu_submit_append_gpfifo_user_direct(c, userdata, | ||
746 | num_entries); | ||
747 | if (err) | ||
748 | return err; | ||
749 | } else if (!kern_gpfifo) { | ||
750 | /* from userspace to vidmem, use the common path */ | ||
751 | err = g->os_channel.copy_user_gpfifo(c->gpfifo.pipe, userdata, | ||
752 | 0, num_entries); | ||
753 | if (err) | ||
754 | return err; | ||
755 | |||
756 | nvgpu_submit_append_gpfifo_common(c, c->gpfifo.pipe, | ||
757 | num_entries); | ||
758 | } else { | ||
759 | /* from kernel to either sysmem or vidmem, don't need | ||
760 | * copy_user_gpfifo so use the common path */ | ||
761 | nvgpu_submit_append_gpfifo_common(c, kern_gpfifo, num_entries); | ||
762 | } | ||
763 | |||
764 | trace_write_pushbuffers(c, num_entries); | ||
765 | |||
766 | c->gpfifo.put = (c->gpfifo.put + num_entries) & | ||
767 | (c->gpfifo.entry_num - 1); | ||
768 | |||
769 | return 0; | ||
770 | } | ||
771 | |||
772 | static int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | ||
773 | struct nvgpu_gpfifo_entry *gpfifo, | ||
774 | struct nvgpu_gpfifo_userdata userdata, | ||
775 | u32 num_entries, | ||
776 | u32 flags, | ||
777 | struct nvgpu_channel_fence *fence, | ||
778 | struct gk20a_fence **fence_out, | ||
779 | struct fifo_profile_gk20a *profile) | ||
780 | { | ||
781 | struct gk20a *g = c->g; | ||
782 | struct priv_cmd_entry *wait_cmd = NULL; | ||
783 | struct priv_cmd_entry *incr_cmd = NULL; | ||
784 | struct gk20a_fence *post_fence = NULL; | ||
785 | struct channel_gk20a_job *job = NULL; | ||
786 | /* we might need two extra gpfifo entries - one for pre fence | ||
787 | * and one for post fence. */ | ||
788 | const int extra_entries = 2; | ||
789 | bool skip_buffer_refcounting = (flags & | ||
790 | NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING); | ||
791 | int err = 0; | ||
792 | bool need_job_tracking; | ||
793 | bool need_deferred_cleanup = false; | ||
794 | |||
795 | if (nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING)) | ||
796 | return -ENODEV; | ||
797 | |||
798 | if (c->has_timedout) | ||
799 | return -ETIMEDOUT; | ||
800 | |||
801 | if (!nvgpu_mem_is_valid(&c->gpfifo.mem)) | ||
802 | return -ENOMEM; | ||
803 | |||
804 | /* fifo not large enough for request. Return error immediately. | ||
805 | * Kernel can insert gpfifo entries before and after user gpfifos. | ||
806 | * So, add extra_entries in user request. Also, HW with fifo size N | ||
807 | * can accept only N-1 entreis and so the below condition */ | ||
808 | if (c->gpfifo.entry_num - 1 < num_entries + extra_entries) { | ||
809 | nvgpu_err(g, "not enough gpfifo space allocated"); | ||
810 | return -ENOMEM; | ||
811 | } | ||
812 | |||
813 | if ((flags & (NVGPU_SUBMIT_FLAGS_FENCE_WAIT | | ||
814 | NVGPU_SUBMIT_FLAGS_FENCE_GET)) && | ||
815 | !fence) | ||
816 | return -EINVAL; | ||
817 | |||
818 | /* an address space needs to have been bound at this point. */ | ||
819 | if (!gk20a_channel_as_bound(c)) { | ||
820 | nvgpu_err(g, | ||
821 | "not bound to an address space at time of gpfifo" | ||
822 | " submission."); | ||
823 | return -EINVAL; | ||
824 | } | ||
825 | |||
826 | gk20a_fifo_profile_snapshot(profile, PROFILE_ENTRY); | ||
827 | |||
828 | /* update debug settings */ | ||
829 | nvgpu_ltc_sync_enabled(g); | ||
830 | |||
831 | nvgpu_log_info(g, "channel %d", c->chid); | ||
832 | |||
833 | /* | ||
834 | * Job tracking is necessary for any of the following conditions: | ||
835 | * - pre- or post-fence functionality | ||
836 | * - channel wdt | ||
837 | * - GPU rail-gating with non-deterministic channels | ||
838 | * - buffer refcounting | ||
839 | * | ||
840 | * If none of the conditions are met, then job tracking is not | ||
841 | * required and a fast submit can be done (ie. only need to write | ||
842 | * out userspace GPFIFO entries and update GP_PUT). | ||
843 | */ | ||
844 | need_job_tracking = (flags & NVGPU_SUBMIT_FLAGS_FENCE_WAIT) || | ||
845 | (flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) || | ||
846 | c->timeout.enabled || | ||
847 | (g->can_railgate && !c->deterministic) || | ||
848 | !skip_buffer_refcounting; | ||
849 | |||
850 | if (need_job_tracking) { | ||
851 | bool need_sync_framework = false; | ||
852 | |||
853 | /* | ||
854 | * If the channel is to have deterministic latency and | ||
855 | * job tracking is required, the channel must have | ||
856 | * pre-allocated resources. Otherwise, we fail the submit here | ||
857 | */ | ||
858 | if (c->deterministic && !channel_gk20a_is_prealloc_enabled(c)) | ||
859 | return -EINVAL; | ||
860 | |||
861 | need_sync_framework = | ||
862 | gk20a_channel_sync_needs_sync_framework(g) || | ||
863 | (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE && | ||
864 | flags & NVGPU_SUBMIT_FLAGS_FENCE_GET); | ||
865 | |||
866 | /* | ||
867 | * Deferred clean-up is necessary for any of the following | ||
868 | * conditions: | ||
869 | * - channel's deterministic flag is not set | ||
870 | * - dependency on sync framework, which could make the | ||
871 | * behavior of the clean-up operation non-deterministic | ||
872 | * (should not be performed in the submit path) | ||
873 | * - channel wdt | ||
874 | * - GPU rail-gating with non-deterministic channels | ||
875 | * - buffer refcounting | ||
876 | * | ||
877 | * If none of the conditions are met, then deferred clean-up | ||
878 | * is not required, and we clean-up one job-tracking | ||
879 | * resource in the submit path. | ||
880 | */ | ||
881 | need_deferred_cleanup = !c->deterministic || | ||
882 | need_sync_framework || | ||
883 | c->timeout.enabled || | ||
884 | (g->can_railgate && | ||
885 | !c->deterministic) || | ||
886 | !skip_buffer_refcounting; | ||
887 | |||
888 | /* | ||
889 | * For deterministic channels, we don't allow deferred clean_up | ||
890 | * processing to occur. In cases we hit this, we fail the submit | ||
891 | */ | ||
892 | if (c->deterministic && need_deferred_cleanup) | ||
893 | return -EINVAL; | ||
894 | |||
895 | if (!c->deterministic) { | ||
896 | /* | ||
897 | * Get a power ref unless this is a deterministic | ||
898 | * channel that holds them during the channel lifetime. | ||
899 | * This one is released by gk20a_channel_clean_up_jobs, | ||
900 | * via syncpt or sema interrupt, whichever is used. | ||
901 | */ | ||
902 | err = gk20a_busy(g); | ||
903 | if (err) { | ||
904 | nvgpu_err(g, | ||
905 | "failed to host gk20a to submit gpfifo"); | ||
906 | nvgpu_print_current(g, NULL, NVGPU_ERROR); | ||
907 | return err; | ||
908 | } | ||
909 | } | ||
910 | |||
911 | if (!need_deferred_cleanup) { | ||
912 | /* clean up a single job */ | ||
913 | gk20a_channel_clean_up_jobs(c, false); | ||
914 | } | ||
915 | } | ||
916 | |||
917 | |||
918 | /* Grab access to HW to deal with do_idle */ | ||
919 | if (c->deterministic) | ||
920 | nvgpu_rwsem_down_read(&g->deterministic_busy); | ||
921 | |||
922 | if (c->deterministic && c->deterministic_railgate_allowed) { | ||
923 | /* | ||
924 | * Nope - this channel has dropped its own power ref. As | ||
925 | * deterministic submits don't hold power on per each submitted | ||
926 | * job like normal ones do, the GPU might railgate any time now | ||
927 | * and thus submit is disallowed. | ||
928 | */ | ||
929 | err = -EINVAL; | ||
930 | goto clean_up; | ||
931 | } | ||
932 | |||
933 | trace_gk20a_channel_submit_gpfifo(g->name, | ||
934 | c->chid, | ||
935 | num_entries, | ||
936 | flags, | ||
937 | fence ? fence->id : 0, | ||
938 | fence ? fence->value : 0); | ||
939 | |||
940 | nvgpu_log_info(g, "pre-submit put %d, get %d, size %d", | ||
941 | c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num); | ||
942 | |||
943 | /* | ||
944 | * Make sure we have enough space for gpfifo entries. Check cached | ||
945 | * values first and then read from HW. If no space, return EAGAIN | ||
946 | * and let userpace decide to re-try request or not. | ||
947 | */ | ||
948 | if (nvgpu_gp_free_count(c) < num_entries + extra_entries) { | ||
949 | if (nvgpu_get_gp_free_count(c) < num_entries + extra_entries) { | ||
950 | err = -EAGAIN; | ||
951 | goto clean_up; | ||
952 | } | ||
953 | } | ||
954 | |||
955 | if (c->has_timedout) { | ||
956 | err = -ETIMEDOUT; | ||
957 | goto clean_up; | ||
958 | } | ||
959 | |||
960 | if (need_job_tracking) { | ||
961 | err = channel_gk20a_alloc_job(c, &job); | ||
962 | if (err) | ||
963 | goto clean_up; | ||
964 | |||
965 | err = gk20a_submit_prepare_syncs(c, fence, job, | ||
966 | &wait_cmd, &incr_cmd, | ||
967 | &post_fence, | ||
968 | need_deferred_cleanup, | ||
969 | flags); | ||
970 | if (err) | ||
971 | goto clean_up_job; | ||
972 | } | ||
973 | |||
974 | gk20a_fifo_profile_snapshot(profile, PROFILE_JOB_TRACKING); | ||
975 | |||
976 | if (wait_cmd) | ||
977 | gk20a_submit_append_priv_cmdbuf(c, wait_cmd); | ||
978 | |||
979 | err = nvgpu_submit_append_gpfifo(c, gpfifo, userdata, | ||
980 | num_entries); | ||
981 | if (err) | ||
982 | goto clean_up_job; | ||
983 | |||
984 | /* | ||
985 | * And here's where we add the incr_cmd we generated earlier. It should | ||
986 | * always run! | ||
987 | */ | ||
988 | if (incr_cmd) | ||
989 | gk20a_submit_append_priv_cmdbuf(c, incr_cmd); | ||
990 | |||
991 | if (fence_out) | ||
992 | *fence_out = gk20a_fence_get(post_fence); | ||
993 | |||
994 | if (need_job_tracking) | ||
995 | /* TODO! Check for errors... */ | ||
996 | gk20a_channel_add_job(c, job, skip_buffer_refcounting); | ||
997 | gk20a_fifo_profile_snapshot(profile, PROFILE_APPEND); | ||
998 | |||
999 | g->ops.fifo.userd_gp_put(g, c); | ||
1000 | |||
1001 | /* No hw access beyond this point */ | ||
1002 | if (c->deterministic) | ||
1003 | nvgpu_rwsem_up_read(&g->deterministic_busy); | ||
1004 | |||
1005 | trace_gk20a_channel_submitted_gpfifo(g->name, | ||
1006 | c->chid, | ||
1007 | num_entries, | ||
1008 | flags, | ||
1009 | post_fence ? post_fence->syncpt_id : 0, | ||
1010 | post_fence ? post_fence->syncpt_value : 0); | ||
1011 | |||
1012 | nvgpu_log_info(g, "post-submit put %d, get %d, size %d", | ||
1013 | c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num); | ||
1014 | |||
1015 | gk20a_fifo_profile_snapshot(profile, PROFILE_END); | ||
1016 | |||
1017 | nvgpu_log_fn(g, "done"); | ||
1018 | return err; | ||
1019 | |||
1020 | clean_up_job: | ||
1021 | channel_gk20a_free_job(c, job); | ||
1022 | clean_up: | ||
1023 | nvgpu_log_fn(g, "fail"); | ||
1024 | gk20a_fence_put(post_fence); | ||
1025 | if (c->deterministic) | ||
1026 | nvgpu_rwsem_up_read(&g->deterministic_busy); | ||
1027 | else if (need_deferred_cleanup) | ||
1028 | gk20a_idle(g); | ||
1029 | |||
1030 | return err; | ||
1031 | } | ||
1032 | |||
1033 | int gk20a_submit_channel_gpfifo_user(struct channel_gk20a *c, | ||
1034 | struct nvgpu_gpfifo_userdata userdata, | ||
1035 | u32 num_entries, | ||
1036 | u32 flags, | ||
1037 | struct nvgpu_channel_fence *fence, | ||
1038 | struct gk20a_fence **fence_out, | ||
1039 | struct fifo_profile_gk20a *profile) | ||
1040 | { | ||
1041 | return gk20a_submit_channel_gpfifo(c, NULL, userdata, num_entries, | ||
1042 | flags, fence, fence_out, profile); | ||
1043 | } | ||
1044 | |||
1045 | int gk20a_submit_channel_gpfifo_kernel(struct channel_gk20a *c, | ||
1046 | struct nvgpu_gpfifo_entry *gpfifo, | ||
1047 | u32 num_entries, | ||
1048 | u32 flags, | ||
1049 | struct nvgpu_channel_fence *fence, | ||
1050 | struct gk20a_fence **fence_out) | ||
1051 | { | ||
1052 | struct nvgpu_gpfifo_userdata userdata = { NULL, NULL }; | ||
1053 | return gk20a_submit_channel_gpfifo(c, gpfifo, userdata, num_entries, | ||
1054 | flags, fence, fence_out, NULL); | ||
1055 | } | 507 | } |
508 | #endif | ||
diff --git a/drivers/gpu/nvgpu/os/linux/channel.h b/drivers/gpu/nvgpu/os/linux/channel.h index 43fa492b..87231a79 100644 --- a/drivers/gpu/nvgpu/os/linux/channel.h +++ b/drivers/gpu/nvgpu/os/linux/channel.h | |||
@@ -84,19 +84,4 @@ struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g, | |||
84 | int runlist_id, | 84 | int runlist_id, |
85 | bool is_privileged_channel); | 85 | bool is_privileged_channel); |
86 | 86 | ||
87 | int gk20a_submit_channel_gpfifo_user(struct channel_gk20a *c, | ||
88 | struct nvgpu_gpfifo_userdata userdata, | ||
89 | u32 num_entries, | ||
90 | u32 flags, | ||
91 | struct nvgpu_channel_fence *fence, | ||
92 | struct gk20a_fence **fence_out, | ||
93 | struct fifo_profile_gk20a *profile); | ||
94 | |||
95 | int gk20a_submit_channel_gpfifo_kernel(struct channel_gk20a *c, | ||
96 | struct nvgpu_gpfifo_entry *gpfifo, | ||
97 | u32 num_entries, | ||
98 | u32 flags, | ||
99 | struct nvgpu_channel_fence *fence, | ||
100 | struct gk20a_fence **fence_out); | ||
101 | |||
102 | #endif /* __NVGPU_CHANNEL_H__ */ | 87 | #endif /* __NVGPU_CHANNEL_H__ */ |
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_channel.c b/drivers/gpu/nvgpu/os/linux/ioctl_channel.c index fa6a02d6..7b003b76 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_channel.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_channel.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #include <nvgpu/barrier.h> | 34 | #include <nvgpu/barrier.h> |
35 | #include <nvgpu/nvhost.h> | 35 | #include <nvgpu/nvhost.h> |
36 | #include <nvgpu/os_sched.h> | 36 | #include <nvgpu/os_sched.h> |
37 | #include <nvgpu/channel.h> | ||
37 | 38 | ||
38 | #include "gk20a/gk20a.h" | 39 | #include "gk20a/gk20a.h" |
39 | #include "gk20a/dbg_gpu_gk20a.h" | 40 | #include "gk20a/dbg_gpu_gk20a.h" |
@@ -799,11 +800,11 @@ static int gk20a_ioctl_channel_submit_gpfifo( | |||
799 | return fd; | 800 | return fd; |
800 | } | 801 | } |
801 | 802 | ||
802 | userdata.entries = (struct nvgpu_gpfifo_entry __user*) | 803 | userdata.entries = (struct nvgpu_gpfifo_entry __user *) |
803 | (uintptr_t)args->gpfifo; | 804 | (uintptr_t)args->gpfifo; |
804 | userdata.context = NULL; | 805 | userdata.context = NULL; |
805 | 806 | ||
806 | ret = gk20a_submit_channel_gpfifo_user(ch, | 807 | ret = nvgpu_submit_channel_gpfifo_user(ch, |
807 | userdata, args->num_entries, | 808 | userdata, args->num_entries, |
808 | submit_flags, &fence, &fence_out, profile); | 809 | submit_flags, &fence, &fence_out, profile); |
809 | 810 | ||
diff --git a/include/trace/events/gk20a.h b/include/trace/events/gk20a.h index ef51451a..8c73ac7e 100644 --- a/include/trace/events/gk20a.h +++ b/include/trace/events/gk20a.h | |||
@@ -646,4 +646,8 @@ DEFINE_EVENT(gk20a_cde, gk20a_cde_finished_ctx_cb, | |||
646 | #define trace_gk20a_ltc_cbc_ctrl_start(arg...) ((void)(NULL)) | 646 | #define trace_gk20a_ltc_cbc_ctrl_start(arg...) ((void)(NULL)) |
647 | #define trace_gk20a_ltc_cbc_ctrl_done(arg...) ((void)(NULL)) | 647 | #define trace_gk20a_ltc_cbc_ctrl_done(arg...) ((void)(NULL)) |
648 | 648 | ||
649 | #define trace_gk20a_channel_submit_gpfifo(arg...) ((void)(NULL)) | ||
650 | #define trace_gk20a_channel_submitted_gpfifo(arg...) ((void)(NULL)) | ||
651 | #define trace_gk20a_push_cmdbuf(arg...) ((void)(NULL)) | ||
652 | |||
649 | #endif | 653 | #endif |