diff options
author | Konsta Holtta <kholtta@nvidia.com> | 2018-08-21 05:27:07 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2018-08-24 17:57:38 -0400 |
commit | 0c387d76dcc7e665255200ba8d98b9abb11cb4a1 (patch) | |
tree | a41f3dc117a8b4981ba0dc0e89efc9818d54ff09 /drivers/gpu/nvgpu/common/fifo | |
parent | f062cc5b24554f6ae67abbe846e6d6e8c15c4ffc (diff) |
gpu: nvgpu: move channel code to common
Do a simple rename of channel_gk20a.c to common/fifo/channel.c. Header
cleanup and the like will soon follow. Also rename the os-specific files
to have unique names across directories because tmake requires that.
Jira NVGPU-967
Change-Id: I302bbbbe29735264e832378d444a176a4023e3e1
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1804608
Reviewed-by: svc-misra-checker <svc-misra-checker@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-by: Richard Zhao <rizhao@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/common/fifo')
-rw-r--r-- | drivers/gpu/nvgpu/common/fifo/channel.c | 2262 |
1 files changed, 2262 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/fifo/channel.c b/drivers/gpu/nvgpu/common/fifo/channel.c new file mode 100644 index 00000000..5966e191 --- /dev/null +++ b/drivers/gpu/nvgpu/common/fifo/channel.c | |||
@@ -0,0 +1,2262 @@ | |||
1 | /* | ||
2 | * GK20A Graphics channel | ||
3 | * | ||
4 | * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | #include <trace/events/gk20a.h> | ||
26 | |||
27 | #include <nvgpu/semaphore.h> | ||
28 | #include <nvgpu/timers.h> | ||
29 | #include <nvgpu/kmem.h> | ||
30 | #include <nvgpu/dma.h> | ||
31 | #include <nvgpu/log.h> | ||
32 | #include <nvgpu/atomic.h> | ||
33 | #include <nvgpu/bug.h> | ||
34 | #include <nvgpu/list.h> | ||
35 | #include <nvgpu/circ_buf.h> | ||
36 | #include <nvgpu/cond.h> | ||
37 | #include <nvgpu/enabled.h> | ||
38 | #include <nvgpu/debug.h> | ||
39 | #include <nvgpu/ltc.h> | ||
40 | #include <nvgpu/barrier.h> | ||
41 | #include <nvgpu/ctxsw_trace.h> | ||
42 | #include <nvgpu/error_notifier.h> | ||
43 | #include <nvgpu/os_sched.h> | ||
44 | #include <nvgpu/log2.h> | ||
45 | #include <nvgpu/ptimer.h> | ||
46 | |||
47 | #include "gk20a/gk20a.h" | ||
48 | #include "gk20a/dbg_gpu_gk20a.h" | ||
49 | #include "gk20a/fence_gk20a.h" | ||
50 | |||
51 | static void free_channel(struct fifo_gk20a *f, struct channel_gk20a *c); | ||
52 | static void gk20a_channel_dump_ref_actions(struct channel_gk20a *c); | ||
53 | |||
54 | static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c); | ||
55 | static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c); | ||
56 | |||
57 | static void channel_gk20a_free_prealloc_resources(struct channel_gk20a *c); | ||
58 | |||
59 | static void channel_gk20a_joblist_add(struct channel_gk20a *c, | ||
60 | struct channel_gk20a_job *job); | ||
61 | static void channel_gk20a_joblist_delete(struct channel_gk20a *c, | ||
62 | struct channel_gk20a_job *job); | ||
63 | static struct channel_gk20a_job *channel_gk20a_joblist_peek( | ||
64 | struct channel_gk20a *c); | ||
65 | |||
66 | /* allocate GPU channel */ | ||
67 | static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f) | ||
68 | { | ||
69 | struct channel_gk20a *ch = NULL; | ||
70 | struct gk20a *g = f->g; | ||
71 | |||
72 | nvgpu_mutex_acquire(&f->free_chs_mutex); | ||
73 | if (!nvgpu_list_empty(&f->free_chs)) { | ||
74 | ch = nvgpu_list_first_entry(&f->free_chs, channel_gk20a, | ||
75 | free_chs); | ||
76 | nvgpu_list_del(&ch->free_chs); | ||
77 | WARN_ON(nvgpu_atomic_read(&ch->ref_count)); | ||
78 | WARN_ON(ch->referenceable); | ||
79 | f->used_channels++; | ||
80 | } | ||
81 | nvgpu_mutex_release(&f->free_chs_mutex); | ||
82 | |||
83 | if (g->aggressive_sync_destroy_thresh && | ||
84 | (f->used_channels > | ||
85 | g->aggressive_sync_destroy_thresh)) | ||
86 | g->aggressive_sync_destroy = true; | ||
87 | |||
88 | return ch; | ||
89 | } | ||
90 | |||
91 | static void free_channel(struct fifo_gk20a *f, | ||
92 | struct channel_gk20a *ch) | ||
93 | { | ||
94 | struct gk20a *g = f->g; | ||
95 | |||
96 | trace_gk20a_release_used_channel(ch->chid); | ||
97 | /* refcount is zero here and channel is in a freed/dead state */ | ||
98 | nvgpu_mutex_acquire(&f->free_chs_mutex); | ||
99 | /* add to head to increase visibility of timing-related bugs */ | ||
100 | nvgpu_list_add(&ch->free_chs, &f->free_chs); | ||
101 | f->used_channels--; | ||
102 | nvgpu_mutex_release(&f->free_chs_mutex); | ||
103 | |||
104 | /* | ||
105 | * On teardown it is not possible to dereference platform, but ignoring | ||
106 | * this is fine then because no new channels would be created. | ||
107 | */ | ||
108 | if (!nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING)) { | ||
109 | if (g->aggressive_sync_destroy_thresh && | ||
110 | (f->used_channels < | ||
111 | g->aggressive_sync_destroy_thresh)) | ||
112 | g->aggressive_sync_destroy = false; | ||
113 | } | ||
114 | } | ||
115 | |||
116 | int channel_gk20a_commit_va(struct channel_gk20a *c) | ||
117 | { | ||
118 | struct gk20a *g = c->g; | ||
119 | |||
120 | nvgpu_log_fn(g, " "); | ||
121 | |||
122 | g->ops.mm.init_inst_block(&c->inst_block, c->vm, | ||
123 | c->vm->gmmu_page_sizes[GMMU_PAGE_SIZE_BIG]); | ||
124 | |||
125 | return 0; | ||
126 | } | ||
127 | |||
128 | int gk20a_channel_get_timescale_from_timeslice(struct gk20a *g, | ||
129 | unsigned int timeslice_period, | ||
130 | unsigned int *__timeslice_timeout, unsigned int *__timeslice_scale) | ||
131 | { | ||
132 | unsigned int value = scale_ptimer(timeslice_period, | ||
133 | ptimer_scalingfactor10x(g->ptimer_src_freq)); | ||
134 | unsigned int shift = 0; | ||
135 | |||
136 | /* value field is 8 bits long */ | ||
137 | while (value >= 1 << 8) { | ||
138 | value >>= 1; | ||
139 | shift++; | ||
140 | } | ||
141 | |||
142 | /* time slice register is only 18bits long */ | ||
143 | if ((value << shift) >= 1<<19) { | ||
144 | nvgpu_err(g, "Requested timeslice value is clamped to 18 bits\n"); | ||
145 | value = 255; | ||
146 | shift = 10; | ||
147 | } | ||
148 | |||
149 | *__timeslice_timeout = value; | ||
150 | *__timeslice_scale = shift; | ||
151 | |||
152 | return 0; | ||
153 | } | ||
154 | |||
155 | int channel_gk20a_update_runlist(struct channel_gk20a *c, bool add) | ||
156 | { | ||
157 | return c->g->ops.fifo.update_runlist(c->g, c->runlist_id, c->chid, add, true); | ||
158 | } | ||
159 | |||
160 | int gk20a_enable_channel_tsg(struct gk20a *g, struct channel_gk20a *ch) | ||
161 | { | ||
162 | struct tsg_gk20a *tsg; | ||
163 | |||
164 | if (gk20a_is_channel_marked_as_tsg(ch)) { | ||
165 | tsg = &g->fifo.tsg[ch->tsgid]; | ||
166 | g->ops.fifo.enable_tsg(tsg); | ||
167 | } else { | ||
168 | g->ops.fifo.enable_channel(ch); | ||
169 | } | ||
170 | |||
171 | return 0; | ||
172 | } | ||
173 | |||
174 | int gk20a_disable_channel_tsg(struct gk20a *g, struct channel_gk20a *ch) | ||
175 | { | ||
176 | struct tsg_gk20a *tsg; | ||
177 | |||
178 | if (gk20a_is_channel_marked_as_tsg(ch)) { | ||
179 | tsg = &g->fifo.tsg[ch->tsgid]; | ||
180 | g->ops.fifo.disable_tsg(tsg); | ||
181 | } else { | ||
182 | g->ops.fifo.disable_channel(ch); | ||
183 | } | ||
184 | |||
185 | return 0; | ||
186 | } | ||
187 | |||
188 | void gk20a_channel_abort_clean_up(struct channel_gk20a *ch) | ||
189 | { | ||
190 | /* synchronize with actual job cleanup */ | ||
191 | nvgpu_mutex_acquire(&ch->joblist.cleanup_lock); | ||
192 | |||
193 | /* ensure no fences are pending */ | ||
194 | nvgpu_mutex_acquire(&ch->sync_lock); | ||
195 | if (ch->sync) | ||
196 | ch->sync->set_min_eq_max(ch->sync); | ||
197 | if (ch->user_sync) | ||
198 | ch->user_sync->set_safe_state(ch->user_sync); | ||
199 | nvgpu_mutex_release(&ch->sync_lock); | ||
200 | |||
201 | nvgpu_mutex_release(&ch->joblist.cleanup_lock); | ||
202 | |||
203 | /* | ||
204 | * When closing the channel, this scheduled update holds one ref which | ||
205 | * is waited for before advancing with freeing. | ||
206 | */ | ||
207 | gk20a_channel_update(ch); | ||
208 | } | ||
209 | |||
210 | void gk20a_channel_abort(struct channel_gk20a *ch, bool channel_preempt) | ||
211 | { | ||
212 | nvgpu_log_fn(ch->g, " "); | ||
213 | |||
214 | if (gk20a_is_channel_marked_as_tsg(ch)) | ||
215 | return gk20a_fifo_abort_tsg(ch->g, ch->tsgid, channel_preempt); | ||
216 | |||
217 | /* make sure new kickoffs are prevented */ | ||
218 | ch->has_timedout = true; | ||
219 | |||
220 | ch->g->ops.fifo.disable_channel(ch); | ||
221 | |||
222 | if (channel_preempt && gk20a_is_channel_marked_as_tsg(ch)) | ||
223 | ch->g->ops.fifo.preempt_channel(ch->g, ch->chid); | ||
224 | |||
225 | if (ch->g->ops.fifo.ch_abort_clean_up) | ||
226 | ch->g->ops.fifo.ch_abort_clean_up(ch); | ||
227 | } | ||
228 | |||
229 | int gk20a_wait_channel_idle(struct channel_gk20a *ch) | ||
230 | { | ||
231 | bool channel_idle = false; | ||
232 | struct nvgpu_timeout timeout; | ||
233 | |||
234 | nvgpu_timeout_init(ch->g, &timeout, gk20a_get_gr_idle_timeout(ch->g), | ||
235 | NVGPU_TIMER_CPU_TIMER); | ||
236 | |||
237 | do { | ||
238 | channel_gk20a_joblist_lock(ch); | ||
239 | channel_idle = channel_gk20a_joblist_is_empty(ch); | ||
240 | channel_gk20a_joblist_unlock(ch); | ||
241 | if (channel_idle) | ||
242 | break; | ||
243 | |||
244 | nvgpu_usleep_range(1000, 3000); | ||
245 | } while (!nvgpu_timeout_expired(&timeout)); | ||
246 | |||
247 | if (!channel_idle) { | ||
248 | nvgpu_err(ch->g, "jobs not freed for channel %d", | ||
249 | ch->chid); | ||
250 | return -EBUSY; | ||
251 | } | ||
252 | |||
253 | return 0; | ||
254 | } | ||
255 | |||
256 | void gk20a_disable_channel(struct channel_gk20a *ch) | ||
257 | { | ||
258 | gk20a_channel_abort(ch, true); | ||
259 | channel_gk20a_update_runlist(ch, false); | ||
260 | } | ||
261 | |||
262 | void gk20a_wait_until_counter_is_N( | ||
263 | struct channel_gk20a *ch, nvgpu_atomic_t *counter, int wait_value, | ||
264 | struct nvgpu_cond *c, const char *caller, const char *counter_name) | ||
265 | { | ||
266 | while (true) { | ||
267 | if (NVGPU_COND_WAIT( | ||
268 | c, | ||
269 | nvgpu_atomic_read(counter) == wait_value, | ||
270 | 5000) == 0) | ||
271 | break; | ||
272 | |||
273 | nvgpu_warn(ch->g, | ||
274 | "%s: channel %d, still waiting, %s left: %d, waiting for: %d", | ||
275 | caller, ch->chid, counter_name, | ||
276 | nvgpu_atomic_read(counter), wait_value); | ||
277 | |||
278 | gk20a_channel_dump_ref_actions(ch); | ||
279 | } | ||
280 | } | ||
281 | |||
282 | /* call ONLY when no references to the channel exist: after the last put */ | ||
283 | static void gk20a_free_channel(struct channel_gk20a *ch, bool force) | ||
284 | { | ||
285 | struct gk20a *g = ch->g; | ||
286 | struct fifo_gk20a *f = &g->fifo; | ||
287 | struct gr_gk20a *gr = &g->gr; | ||
288 | struct vm_gk20a *ch_vm = ch->vm; | ||
289 | unsigned long timeout = gk20a_get_gr_idle_timeout(g); | ||
290 | struct dbg_session_gk20a *dbg_s; | ||
291 | struct dbg_session_data *session_data, *tmp_s; | ||
292 | struct dbg_session_channel_data *ch_data, *tmp; | ||
293 | int err; | ||
294 | |||
295 | nvgpu_log_fn(g, " "); | ||
296 | |||
297 | WARN_ON(ch->g == NULL); | ||
298 | |||
299 | trace_gk20a_free_channel(ch->chid); | ||
300 | |||
301 | if (g->os_channel.close) | ||
302 | g->os_channel.close(ch); | ||
303 | |||
304 | /* | ||
305 | * Disable channel/TSG and unbind here. This should not be executed if | ||
306 | * HW access is not available during shutdown/removal path as it will | ||
307 | * trigger a timeout | ||
308 | */ | ||
309 | if (!nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING)) { | ||
310 | /* abort channel and remove from runlist */ | ||
311 | if (gk20a_is_channel_marked_as_tsg(ch)) { | ||
312 | err = gk20a_tsg_unbind_channel(ch); | ||
313 | if (err) | ||
314 | nvgpu_err(g, | ||
315 | "failed to unbind channel %d from TSG", | ||
316 | ch->chid); | ||
317 | } else { | ||
318 | /* | ||
319 | * Channel is already unbound from TSG by User with | ||
320 | * explicit call | ||
321 | * Nothing to do here in that case | ||
322 | */ | ||
323 | } | ||
324 | } | ||
325 | /* wait until there's only our ref to the channel */ | ||
326 | if (!force) | ||
327 | gk20a_wait_until_counter_is_N( | ||
328 | ch, &ch->ref_count, 1, &ch->ref_count_dec_wq, | ||
329 | __func__, "references"); | ||
330 | |||
331 | /* wait until all pending interrupts for recently completed | ||
332 | * jobs are handled */ | ||
333 | nvgpu_wait_for_deferred_interrupts(g); | ||
334 | |||
335 | /* prevent new refs */ | ||
336 | nvgpu_spinlock_acquire(&ch->ref_obtain_lock); | ||
337 | if (!ch->referenceable) { | ||
338 | nvgpu_spinlock_release(&ch->ref_obtain_lock); | ||
339 | nvgpu_err(ch->g, | ||
340 | "Extra %s() called to channel %u", | ||
341 | __func__, ch->chid); | ||
342 | return; | ||
343 | } | ||
344 | ch->referenceable = false; | ||
345 | nvgpu_spinlock_release(&ch->ref_obtain_lock); | ||
346 | |||
347 | /* matches with the initial reference in gk20a_open_new_channel() */ | ||
348 | nvgpu_atomic_dec(&ch->ref_count); | ||
349 | |||
350 | /* wait until no more refs to the channel */ | ||
351 | if (!force) | ||
352 | gk20a_wait_until_counter_is_N( | ||
353 | ch, &ch->ref_count, 0, &ch->ref_count_dec_wq, | ||
354 | __func__, "references"); | ||
355 | |||
356 | /* if engine reset was deferred, perform it now */ | ||
357 | nvgpu_mutex_acquire(&f->deferred_reset_mutex); | ||
358 | if (g->fifo.deferred_reset_pending) { | ||
359 | nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, "engine reset was" | ||
360 | " deferred, running now"); | ||
361 | /* if lock is already taken, a reset is taking place | ||
362 | so no need to repeat */ | ||
363 | if (nvgpu_mutex_tryacquire(&g->fifo.gr_reset_mutex)) { | ||
364 | gk20a_fifo_deferred_reset(g, ch); | ||
365 | nvgpu_mutex_release(&g->fifo.gr_reset_mutex); | ||
366 | } | ||
367 | } | ||
368 | nvgpu_mutex_release(&f->deferred_reset_mutex); | ||
369 | |||
370 | if (!gk20a_channel_as_bound(ch)) | ||
371 | goto unbind; | ||
372 | |||
373 | nvgpu_log_info(g, "freeing bound channel context, timeout=%ld", | ||
374 | timeout); | ||
375 | |||
376 | #ifdef CONFIG_GK20A_CTXSW_TRACE | ||
377 | if (g->ops.fecs_trace.unbind_channel && !ch->vpr) | ||
378 | g->ops.fecs_trace.unbind_channel(g, ch); | ||
379 | #endif | ||
380 | |||
381 | if(g->ops.fifo.free_channel_ctx_header) | ||
382 | g->ops.fifo.free_channel_ctx_header(ch); | ||
383 | |||
384 | if (ch->usermode_submit_enabled) { | ||
385 | gk20a_channel_free_usermode_buffers(ch); | ||
386 | ch->userd_iova = nvgpu_mem_get_addr(g, &f->userd) + | ||
387 | ch->chid * f->userd_entry_size; | ||
388 | ch->usermode_submit_enabled = false; | ||
389 | } | ||
390 | |||
391 | gk20a_gr_flush_channel_tlb(gr); | ||
392 | |||
393 | nvgpu_dma_unmap_free(ch_vm, &ch->gpfifo.mem); | ||
394 | nvgpu_big_free(g, ch->gpfifo.pipe); | ||
395 | memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc)); | ||
396 | |||
397 | channel_gk20a_free_priv_cmdbuf(ch); | ||
398 | |||
399 | /* sync must be destroyed before releasing channel vm */ | ||
400 | nvgpu_mutex_acquire(&ch->sync_lock); | ||
401 | if (ch->sync) { | ||
402 | gk20a_channel_sync_destroy(ch->sync, false); | ||
403 | ch->sync = NULL; | ||
404 | } | ||
405 | if (ch->user_sync) { | ||
406 | /* | ||
407 | * Set user managed syncpoint to safe state | ||
408 | * But it's already done if channel has timedout | ||
409 | */ | ||
410 | if (ch->has_timedout) | ||
411 | gk20a_channel_sync_destroy(ch->user_sync, false); | ||
412 | else | ||
413 | gk20a_channel_sync_destroy(ch->user_sync, true); | ||
414 | ch->user_sync = NULL; | ||
415 | } | ||
416 | nvgpu_mutex_release(&ch->sync_lock); | ||
417 | |||
418 | /* | ||
419 | * free the channel used semaphore index. | ||
420 | * we need to do this before releasing the address space, | ||
421 | * as the semaphore pool might get freed after that point. | ||
422 | */ | ||
423 | if (ch->hw_sema) | ||
424 | nvgpu_semaphore_free_hw_sema(ch); | ||
425 | |||
426 | /* | ||
427 | * When releasing the channel we unbind the VM - so release the ref. | ||
428 | */ | ||
429 | nvgpu_vm_put(ch_vm); | ||
430 | |||
431 | /* make sure we don't have deferred interrupts pending that | ||
432 | * could still touch the channel */ | ||
433 | nvgpu_wait_for_deferred_interrupts(g); | ||
434 | |||
435 | unbind: | ||
436 | g->ops.fifo.unbind_channel(ch); | ||
437 | g->ops.fifo.free_inst(g, ch); | ||
438 | |||
439 | /* put back the channel-wide submit ref from init */ | ||
440 | if (ch->deterministic) { | ||
441 | nvgpu_rwsem_down_read(&g->deterministic_busy); | ||
442 | ch->deterministic = false; | ||
443 | if (!ch->deterministic_railgate_allowed) | ||
444 | gk20a_idle(g); | ||
445 | ch->deterministic_railgate_allowed = false; | ||
446 | |||
447 | nvgpu_rwsem_up_read(&g->deterministic_busy); | ||
448 | } | ||
449 | |||
450 | ch->vpr = false; | ||
451 | ch->vm = NULL; | ||
452 | |||
453 | WARN_ON(ch->sync); | ||
454 | |||
455 | /* unlink all debug sessions */ | ||
456 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
457 | |||
458 | nvgpu_list_for_each_entry_safe(session_data, tmp_s, | ||
459 | &ch->dbg_s_list, dbg_session_data, dbg_s_entry) { | ||
460 | dbg_s = session_data->dbg_s; | ||
461 | nvgpu_mutex_acquire(&dbg_s->ch_list_lock); | ||
462 | nvgpu_list_for_each_entry_safe(ch_data, tmp, &dbg_s->ch_list, | ||
463 | dbg_session_channel_data, ch_entry) { | ||
464 | if (ch_data->chid == ch->chid) | ||
465 | ch_data->unbind_single_channel(dbg_s, ch_data); | ||
466 | } | ||
467 | nvgpu_mutex_release(&dbg_s->ch_list_lock); | ||
468 | } | ||
469 | |||
470 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
471 | |||
472 | /* free pre-allocated resources, if applicable */ | ||
473 | if (channel_gk20a_is_prealloc_enabled(ch)) | ||
474 | channel_gk20a_free_prealloc_resources(ch); | ||
475 | |||
476 | #if GK20A_CHANNEL_REFCOUNT_TRACKING | ||
477 | memset(ch->ref_actions, 0, sizeof(ch->ref_actions)); | ||
478 | ch->ref_actions_put = 0; | ||
479 | #endif | ||
480 | |||
481 | /* make sure we catch accesses of unopened channels in case | ||
482 | * there's non-refcounted channel pointers hanging around */ | ||
483 | ch->g = NULL; | ||
484 | nvgpu_smp_wmb(); | ||
485 | |||
486 | /* ALWAYS last */ | ||
487 | free_channel(f, ch); | ||
488 | } | ||
489 | |||
490 | static void gk20a_channel_dump_ref_actions(struct channel_gk20a *ch) | ||
491 | { | ||
492 | #if GK20A_CHANNEL_REFCOUNT_TRACKING | ||
493 | size_t i, get; | ||
494 | s64 now = nvgpu_current_time_ms(); | ||
495 | s64 prev = 0; | ||
496 | struct gk20a *g = ch->g; | ||
497 | |||
498 | nvgpu_spinlock_acquire(&ch->ref_actions_lock); | ||
499 | |||
500 | nvgpu_info(g, "ch %d: refs %d. Actions, most recent last:", | ||
501 | ch->chid, nvgpu_atomic_read(&ch->ref_count)); | ||
502 | |||
503 | /* start at the oldest possible entry. put is next insertion point */ | ||
504 | get = ch->ref_actions_put; | ||
505 | |||
506 | /* | ||
507 | * If the buffer is not full, this will first loop to the oldest entry, | ||
508 | * skipping not-yet-initialized entries. There is no ref_actions_get. | ||
509 | */ | ||
510 | for (i = 0; i < GK20A_CHANNEL_REFCOUNT_TRACKING; i++) { | ||
511 | struct channel_gk20a_ref_action *act = &ch->ref_actions[get]; | ||
512 | |||
513 | if (act->trace.nr_entries) { | ||
514 | nvgpu_info(g, | ||
515 | "%s ref %zu steps ago (age %lld ms, diff %lld ms)", | ||
516 | act->type == channel_gk20a_ref_action_get | ||
517 | ? "GET" : "PUT", | ||
518 | GK20A_CHANNEL_REFCOUNT_TRACKING - 1 - i, | ||
519 | now - act->timestamp_ms, | ||
520 | act->timestamp_ms - prev); | ||
521 | |||
522 | print_stack_trace(&act->trace, 0); | ||
523 | prev = act->timestamp_ms; | ||
524 | } | ||
525 | |||
526 | get = (get + 1) % GK20A_CHANNEL_REFCOUNT_TRACKING; | ||
527 | } | ||
528 | |||
529 | nvgpu_spinlock_release(&ch->ref_actions_lock); | ||
530 | #endif | ||
531 | } | ||
532 | |||
533 | static void gk20a_channel_save_ref_source(struct channel_gk20a *ch, | ||
534 | enum channel_gk20a_ref_action_type type) | ||
535 | { | ||
536 | #if GK20A_CHANNEL_REFCOUNT_TRACKING | ||
537 | struct channel_gk20a_ref_action *act; | ||
538 | |||
539 | nvgpu_spinlock_acquire(&ch->ref_actions_lock); | ||
540 | |||
541 | act = &ch->ref_actions[ch->ref_actions_put]; | ||
542 | act->type = type; | ||
543 | act->trace.max_entries = GK20A_CHANNEL_REFCOUNT_TRACKING_STACKLEN; | ||
544 | act->trace.nr_entries = 0; | ||
545 | act->trace.skip = 3; /* onwards from the caller of this */ | ||
546 | act->trace.entries = act->trace_entries; | ||
547 | save_stack_trace(&act->trace); | ||
548 | act->timestamp_ms = nvgpu_current_time_ms(); | ||
549 | ch->ref_actions_put = (ch->ref_actions_put + 1) % | ||
550 | GK20A_CHANNEL_REFCOUNT_TRACKING; | ||
551 | |||
552 | nvgpu_spinlock_release(&ch->ref_actions_lock); | ||
553 | #endif | ||
554 | } | ||
555 | |||
556 | /* Try to get a reference to the channel. Return nonzero on success. If fails, | ||
557 | * the channel is dead or being freed elsewhere and you must not touch it. | ||
558 | * | ||
559 | * Always when a channel_gk20a pointer is seen and about to be used, a | ||
560 | * reference must be held to it - either by you or the caller, which should be | ||
561 | * documented well or otherwise clearly seen. This usually boils down to the | ||
562 | * file from ioctls directly, or an explicit get in exception handlers when the | ||
563 | * channel is found by a chid. | ||
564 | * | ||
565 | * Most global functions in this file require a reference to be held by the | ||
566 | * caller. | ||
567 | */ | ||
568 | struct channel_gk20a *_gk20a_channel_get(struct channel_gk20a *ch, | ||
569 | const char *caller) { | ||
570 | struct channel_gk20a *ret; | ||
571 | |||
572 | nvgpu_spinlock_acquire(&ch->ref_obtain_lock); | ||
573 | |||
574 | if (likely(ch->referenceable)) { | ||
575 | gk20a_channel_save_ref_source(ch, channel_gk20a_ref_action_get); | ||
576 | nvgpu_atomic_inc(&ch->ref_count); | ||
577 | ret = ch; | ||
578 | } else | ||
579 | ret = NULL; | ||
580 | |||
581 | nvgpu_spinlock_release(&ch->ref_obtain_lock); | ||
582 | |||
583 | if (ret) | ||
584 | trace_gk20a_channel_get(ch->chid, caller); | ||
585 | |||
586 | return ret; | ||
587 | } | ||
588 | |||
589 | void _gk20a_channel_put(struct channel_gk20a *ch, const char *caller) | ||
590 | { | ||
591 | gk20a_channel_save_ref_source(ch, channel_gk20a_ref_action_put); | ||
592 | trace_gk20a_channel_put(ch->chid, caller); | ||
593 | nvgpu_atomic_dec(&ch->ref_count); | ||
594 | nvgpu_cond_broadcast(&ch->ref_count_dec_wq); | ||
595 | |||
596 | /* More puts than gets. Channel is probably going to get | ||
597 | * stuck. */ | ||
598 | WARN_ON(nvgpu_atomic_read(&ch->ref_count) < 0); | ||
599 | |||
600 | /* Also, more puts than gets. ref_count can go to 0 only if | ||
601 | * the channel is closing. Channel is probably going to get | ||
602 | * stuck. */ | ||
603 | WARN_ON(nvgpu_atomic_read(&ch->ref_count) == 0 && ch->referenceable); | ||
604 | } | ||
605 | |||
606 | void gk20a_channel_close(struct channel_gk20a *ch) | ||
607 | { | ||
608 | gk20a_free_channel(ch, false); | ||
609 | } | ||
610 | |||
611 | /* | ||
612 | * Be careful with this - it is meant for terminating channels when we know the | ||
613 | * driver is otherwise dying. Ref counts and the like are ignored by this | ||
614 | * version of the cleanup. | ||
615 | */ | ||
616 | void __gk20a_channel_kill(struct channel_gk20a *ch) | ||
617 | { | ||
618 | gk20a_free_channel(ch, true); | ||
619 | } | ||
620 | |||
621 | struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g, | ||
622 | s32 runlist_id, | ||
623 | bool is_privileged_channel, | ||
624 | pid_t pid, pid_t tid) | ||
625 | { | ||
626 | struct fifo_gk20a *f = &g->fifo; | ||
627 | struct channel_gk20a *ch; | ||
628 | |||
629 | /* compatibility with existing code */ | ||
630 | if (!gk20a_fifo_is_valid_runlist_id(g, runlist_id)) { | ||
631 | runlist_id = gk20a_fifo_get_gr_runlist_id(g); | ||
632 | } | ||
633 | |||
634 | nvgpu_log_fn(g, " "); | ||
635 | |||
636 | ch = allocate_channel(f); | ||
637 | if (ch == NULL) { | ||
638 | /* TBD: we want to make this virtualizable */ | ||
639 | nvgpu_err(g, "out of hw chids"); | ||
640 | return NULL; | ||
641 | } | ||
642 | |||
643 | trace_gk20a_open_new_channel(ch->chid); | ||
644 | |||
645 | BUG_ON(ch->g); | ||
646 | ch->g = g; | ||
647 | |||
648 | /* Runlist for the channel */ | ||
649 | ch->runlist_id = runlist_id; | ||
650 | |||
651 | /* Channel privilege level */ | ||
652 | ch->is_privileged_channel = is_privileged_channel; | ||
653 | |||
654 | ch->pid = tid; | ||
655 | ch->tgid = pid; /* process granularity for FECS traces */ | ||
656 | |||
657 | if (g->ops.fifo.alloc_inst(g, ch)) { | ||
658 | ch->g = NULL; | ||
659 | free_channel(f, ch); | ||
660 | nvgpu_err(g, | ||
661 | "failed to open gk20a channel, out of inst mem"); | ||
662 | return NULL; | ||
663 | } | ||
664 | |||
665 | /* now the channel is in a limbo out of the free list but not marked as | ||
666 | * alive and used (i.e. get-able) yet */ | ||
667 | |||
668 | /* By default, channel is regular (non-TSG) channel */ | ||
669 | ch->tsgid = NVGPU_INVALID_TSG_ID; | ||
670 | |||
671 | /* clear ctxsw timeout counter and update timestamp */ | ||
672 | ch->timeout_accumulated_ms = 0; | ||
673 | ch->timeout_gpfifo_get = 0; | ||
674 | /* set gr host default timeout */ | ||
675 | ch->timeout_ms_max = gk20a_get_gr_idle_timeout(g); | ||
676 | ch->timeout_debug_dump = true; | ||
677 | ch->has_timedout = false; | ||
678 | |||
679 | /* init kernel watchdog timeout */ | ||
680 | ch->timeout.enabled = true; | ||
681 | ch->timeout.limit_ms = g->ch_wdt_timeout_ms; | ||
682 | ch->timeout.debug_dump = true; | ||
683 | |||
684 | ch->obj_class = 0; | ||
685 | ch->subctx_id = 0; | ||
686 | ch->runqueue_sel = 0; | ||
687 | |||
688 | ch->mmu_nack_handled = false; | ||
689 | |||
690 | /* The channel is *not* runnable at this point. It still needs to have | ||
691 | * an address space bound and allocate a gpfifo and grctx. */ | ||
692 | |||
693 | nvgpu_cond_init(&ch->notifier_wq); | ||
694 | nvgpu_cond_init(&ch->semaphore_wq); | ||
695 | |||
696 | if (g->os_channel.open) | ||
697 | g->os_channel.open(ch); | ||
698 | |||
699 | /* Mark the channel alive, get-able, with 1 initial use | ||
700 | * references. The initial reference will be decreased in | ||
701 | * gk20a_free_channel() */ | ||
702 | ch->referenceable = true; | ||
703 | nvgpu_atomic_set(&ch->ref_count, 1); | ||
704 | nvgpu_smp_wmb(); | ||
705 | |||
706 | return ch; | ||
707 | } | ||
708 | |||
709 | /* allocate private cmd buffer. | ||
710 | used for inserting commands before/after user submitted buffers. */ | ||
711 | static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c) | ||
712 | { | ||
713 | struct gk20a *g = c->g; | ||
714 | struct vm_gk20a *ch_vm = c->vm; | ||
715 | struct priv_cmd_queue *q = &c->priv_cmd_q; | ||
716 | u32 size; | ||
717 | int err = 0; | ||
718 | |||
719 | /* | ||
720 | * Compute the amount of priv_cmdbuf space we need. In general the worst | ||
721 | * case is the kernel inserts both a semaphore pre-fence and post-fence. | ||
722 | * Any sync-pt fences will take less memory so we can ignore them for | ||
723 | * now. | ||
724 | * | ||
725 | * A semaphore ACQ (fence-wait) is 8 dwords: semaphore_a, semaphore_b, | ||
726 | * semaphore_c, and semaphore_d. A semaphore INCR (fence-get) will be 10 | ||
727 | * dwords: all the same as an ACQ plus a non-stalling intr which is | ||
728 | * another 2 dwords. | ||
729 | * | ||
730 | * Lastly the number of gpfifo entries per channel is fixed so at most | ||
731 | * we can use 2/3rds of the gpfifo entries (1 pre-fence entry, one | ||
732 | * userspace entry, and one post-fence entry). Thus the computation is: | ||
733 | * | ||
734 | * (gpfifo entry number * (2 / 3) * (8 + 10) * 4 bytes. | ||
735 | */ | ||
736 | size = roundup_pow_of_two(c->gpfifo.entry_num * | ||
737 | 2 * 18 * sizeof(u32) / 3); | ||
738 | |||
739 | err = nvgpu_dma_alloc_map_sys(ch_vm, size, &q->mem); | ||
740 | if (err) { | ||
741 | nvgpu_err(g, "%s: memory allocation failed", __func__); | ||
742 | goto clean_up; | ||
743 | } | ||
744 | |||
745 | q->size = q->mem.size / sizeof (u32); | ||
746 | |||
747 | return 0; | ||
748 | |||
749 | clean_up: | ||
750 | channel_gk20a_free_priv_cmdbuf(c); | ||
751 | return err; | ||
752 | } | ||
753 | |||
754 | static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c) | ||
755 | { | ||
756 | struct vm_gk20a *ch_vm = c->vm; | ||
757 | struct priv_cmd_queue *q = &c->priv_cmd_q; | ||
758 | |||
759 | if (q->size == 0) | ||
760 | return; | ||
761 | |||
762 | nvgpu_dma_unmap_free(ch_vm, &q->mem); | ||
763 | |||
764 | memset(q, 0, sizeof(struct priv_cmd_queue)); | ||
765 | } | ||
766 | |||
767 | /* allocate a cmd buffer with given size. size is number of u32 entries */ | ||
768 | int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size, | ||
769 | struct priv_cmd_entry *e) | ||
770 | { | ||
771 | struct priv_cmd_queue *q = &c->priv_cmd_q; | ||
772 | u32 free_count; | ||
773 | u32 size = orig_size; | ||
774 | |||
775 | nvgpu_log_fn(c->g, "size %d", orig_size); | ||
776 | |||
777 | if (!e) { | ||
778 | nvgpu_err(c->g, | ||
779 | "ch %d: priv cmd entry is null", | ||
780 | c->chid); | ||
781 | return -EINVAL; | ||
782 | } | ||
783 | |||
784 | /* if free space in the end is less than requested, increase the size | ||
785 | * to make the real allocated space start from beginning. */ | ||
786 | if (q->put + size > q->size) | ||
787 | size = orig_size + (q->size - q->put); | ||
788 | |||
789 | nvgpu_log_info(c->g, "ch %d: priv cmd queue get:put %d:%d", | ||
790 | c->chid, q->get, q->put); | ||
791 | |||
792 | free_count = (q->size - (q->put - q->get) - 1) % q->size; | ||
793 | |||
794 | if (size > free_count) | ||
795 | return -EAGAIN; | ||
796 | |||
797 | e->size = orig_size; | ||
798 | e->mem = &q->mem; | ||
799 | |||
800 | /* if we have increased size to skip free space in the end, set put | ||
801 | to beginning of cmd buffer (0) + size */ | ||
802 | if (size != orig_size) { | ||
803 | e->off = 0; | ||
804 | e->gva = q->mem.gpu_va; | ||
805 | q->put = orig_size; | ||
806 | } else { | ||
807 | e->off = q->put; | ||
808 | e->gva = q->mem.gpu_va + q->put * sizeof(u32); | ||
809 | q->put = (q->put + orig_size) & (q->size - 1); | ||
810 | } | ||
811 | |||
812 | /* we already handled q->put + size > q->size so BUG_ON this */ | ||
813 | BUG_ON(q->put > q->size); | ||
814 | |||
815 | /* | ||
816 | * commit the previous writes before making the entry valid. | ||
817 | * see the corresponding nvgpu_smp_rmb() in gk20a_free_priv_cmdbuf(). | ||
818 | */ | ||
819 | nvgpu_smp_wmb(); | ||
820 | |||
821 | e->valid = true; | ||
822 | nvgpu_log_fn(c->g, "done"); | ||
823 | |||
824 | return 0; | ||
825 | } | ||
826 | |||
827 | /* Don't call this to free an explict cmd entry. | ||
828 | * It doesn't update priv_cmd_queue get/put */ | ||
829 | void free_priv_cmdbuf(struct channel_gk20a *c, | ||
830 | struct priv_cmd_entry *e) | ||
831 | { | ||
832 | if (channel_gk20a_is_prealloc_enabled(c)) | ||
833 | memset(e, 0, sizeof(struct priv_cmd_entry)); | ||
834 | else | ||
835 | nvgpu_kfree(c->g, e); | ||
836 | } | ||
837 | |||
838 | int channel_gk20a_alloc_job(struct channel_gk20a *c, | ||
839 | struct channel_gk20a_job **job_out) | ||
840 | { | ||
841 | int err = 0; | ||
842 | |||
843 | if (channel_gk20a_is_prealloc_enabled(c)) { | ||
844 | int put = c->joblist.pre_alloc.put; | ||
845 | int get = c->joblist.pre_alloc.get; | ||
846 | |||
847 | /* | ||
848 | * ensure all subsequent reads happen after reading get. | ||
849 | * see corresponding nvgpu_smp_wmb in | ||
850 | * gk20a_channel_clean_up_jobs() | ||
851 | */ | ||
852 | nvgpu_smp_rmb(); | ||
853 | |||
854 | if (CIRC_SPACE(put, get, c->joblist.pre_alloc.length)) | ||
855 | *job_out = &c->joblist.pre_alloc.jobs[put]; | ||
856 | else { | ||
857 | nvgpu_warn(c->g, | ||
858 | "out of job ringbuffer space"); | ||
859 | err = -EAGAIN; | ||
860 | } | ||
861 | } else { | ||
862 | *job_out = nvgpu_kzalloc(c->g, | ||
863 | sizeof(struct channel_gk20a_job)); | ||
864 | if (!*job_out) | ||
865 | err = -ENOMEM; | ||
866 | } | ||
867 | |||
868 | return err; | ||
869 | } | ||
870 | |||
871 | void channel_gk20a_free_job(struct channel_gk20a *c, | ||
872 | struct channel_gk20a_job *job) | ||
873 | { | ||
874 | /* | ||
875 | * In case of pre_allocated jobs, we need to clean out | ||
876 | * the job but maintain the pointers to the priv_cmd_entry, | ||
877 | * since they're inherently tied to the job node. | ||
878 | */ | ||
879 | if (channel_gk20a_is_prealloc_enabled(c)) { | ||
880 | struct priv_cmd_entry *wait_cmd = job->wait_cmd; | ||
881 | struct priv_cmd_entry *incr_cmd = job->incr_cmd; | ||
882 | memset(job, 0, sizeof(*job)); | ||
883 | job->wait_cmd = wait_cmd; | ||
884 | job->incr_cmd = incr_cmd; | ||
885 | } else | ||
886 | nvgpu_kfree(c->g, job); | ||
887 | } | ||
888 | |||
889 | void channel_gk20a_joblist_lock(struct channel_gk20a *c) | ||
890 | { | ||
891 | if (channel_gk20a_is_prealloc_enabled(c)) | ||
892 | nvgpu_mutex_acquire(&c->joblist.pre_alloc.read_lock); | ||
893 | else | ||
894 | nvgpu_spinlock_acquire(&c->joblist.dynamic.lock); | ||
895 | } | ||
896 | |||
897 | void channel_gk20a_joblist_unlock(struct channel_gk20a *c) | ||
898 | { | ||
899 | if (channel_gk20a_is_prealloc_enabled(c)) | ||
900 | nvgpu_mutex_release(&c->joblist.pre_alloc.read_lock); | ||
901 | else | ||
902 | nvgpu_spinlock_release(&c->joblist.dynamic.lock); | ||
903 | } | ||
904 | |||
905 | static struct channel_gk20a_job *channel_gk20a_joblist_peek( | ||
906 | struct channel_gk20a *c) | ||
907 | { | ||
908 | int get; | ||
909 | struct channel_gk20a_job *job = NULL; | ||
910 | |||
911 | if (channel_gk20a_is_prealloc_enabled(c)) { | ||
912 | if (!channel_gk20a_joblist_is_empty(c)) { | ||
913 | get = c->joblist.pre_alloc.get; | ||
914 | job = &c->joblist.pre_alloc.jobs[get]; | ||
915 | } | ||
916 | } else { | ||
917 | if (!nvgpu_list_empty(&c->joblist.dynamic.jobs)) | ||
918 | job = nvgpu_list_first_entry(&c->joblist.dynamic.jobs, | ||
919 | channel_gk20a_job, list); | ||
920 | } | ||
921 | |||
922 | return job; | ||
923 | } | ||
924 | |||
925 | static void channel_gk20a_joblist_add(struct channel_gk20a *c, | ||
926 | struct channel_gk20a_job *job) | ||
927 | { | ||
928 | if (channel_gk20a_is_prealloc_enabled(c)) { | ||
929 | c->joblist.pre_alloc.put = (c->joblist.pre_alloc.put + 1) % | ||
930 | (c->joblist.pre_alloc.length); | ||
931 | } else { | ||
932 | nvgpu_list_add_tail(&job->list, &c->joblist.dynamic.jobs); | ||
933 | } | ||
934 | } | ||
935 | |||
936 | static void channel_gk20a_joblist_delete(struct channel_gk20a *c, | ||
937 | struct channel_gk20a_job *job) | ||
938 | { | ||
939 | if (channel_gk20a_is_prealloc_enabled(c)) { | ||
940 | c->joblist.pre_alloc.get = (c->joblist.pre_alloc.get + 1) % | ||
941 | (c->joblist.pre_alloc.length); | ||
942 | } else { | ||
943 | nvgpu_list_del(&job->list); | ||
944 | } | ||
945 | } | ||
946 | |||
947 | bool channel_gk20a_joblist_is_empty(struct channel_gk20a *c) | ||
948 | { | ||
949 | if (channel_gk20a_is_prealloc_enabled(c)) { | ||
950 | int get = c->joblist.pre_alloc.get; | ||
951 | int put = c->joblist.pre_alloc.put; | ||
952 | return !(CIRC_CNT(put, get, c->joblist.pre_alloc.length)); | ||
953 | } | ||
954 | |||
955 | return nvgpu_list_empty(&c->joblist.dynamic.jobs); | ||
956 | } | ||
957 | |||
958 | bool channel_gk20a_is_prealloc_enabled(struct channel_gk20a *c) | ||
959 | { | ||
960 | bool pre_alloc_enabled = c->joblist.pre_alloc.enabled; | ||
961 | |||
962 | nvgpu_smp_rmb(); | ||
963 | return pre_alloc_enabled; | ||
964 | } | ||
965 | |||
966 | static int channel_gk20a_prealloc_resources(struct channel_gk20a *c, | ||
967 | unsigned int num_jobs) | ||
968 | { | ||
969 | unsigned int i; | ||
970 | int err; | ||
971 | size_t size; | ||
972 | struct priv_cmd_entry *entries = NULL; | ||
973 | |||
974 | if (channel_gk20a_is_prealloc_enabled(c) || !num_jobs) | ||
975 | return -EINVAL; | ||
976 | |||
977 | /* | ||
978 | * pre-allocate the job list. | ||
979 | * since vmalloc take in an unsigned long, we need | ||
980 | * to make sure we don't hit an overflow condition | ||
981 | */ | ||
982 | size = sizeof(struct channel_gk20a_job); | ||
983 | if (num_jobs <= ULONG_MAX / size) | ||
984 | c->joblist.pre_alloc.jobs = nvgpu_vzalloc(c->g, | ||
985 | num_jobs * size); | ||
986 | if (!c->joblist.pre_alloc.jobs) { | ||
987 | err = -ENOMEM; | ||
988 | goto clean_up; | ||
989 | } | ||
990 | |||
991 | /* | ||
992 | * pre-allocate 2x priv_cmd_entry for each job up front. | ||
993 | * since vmalloc take in an unsigned long, we need | ||
994 | * to make sure we don't hit an overflow condition | ||
995 | */ | ||
996 | size = sizeof(struct priv_cmd_entry); | ||
997 | if (num_jobs <= ULONG_MAX / (size << 1)) | ||
998 | entries = nvgpu_vzalloc(c->g, (num_jobs << 1) * size); | ||
999 | if (!entries) { | ||
1000 | err = -ENOMEM; | ||
1001 | goto clean_up_joblist; | ||
1002 | } | ||
1003 | |||
1004 | for (i = 0; i < num_jobs; i++) { | ||
1005 | c->joblist.pre_alloc.jobs[i].wait_cmd = &entries[i]; | ||
1006 | c->joblist.pre_alloc.jobs[i].incr_cmd = | ||
1007 | &entries[i + num_jobs]; | ||
1008 | } | ||
1009 | |||
1010 | /* pre-allocate a fence pool */ | ||
1011 | err = gk20a_alloc_fence_pool(c, num_jobs); | ||
1012 | if (err) | ||
1013 | goto clean_up_priv_cmd; | ||
1014 | |||
1015 | c->joblist.pre_alloc.length = num_jobs; | ||
1016 | c->joblist.pre_alloc.put = 0; | ||
1017 | c->joblist.pre_alloc.get = 0; | ||
1018 | |||
1019 | /* | ||
1020 | * commit the previous writes before setting the flag. | ||
1021 | * see corresponding nvgpu_smp_rmb in | ||
1022 | * channel_gk20a_is_prealloc_enabled() | ||
1023 | */ | ||
1024 | nvgpu_smp_wmb(); | ||
1025 | c->joblist.pre_alloc.enabled = true; | ||
1026 | |||
1027 | return 0; | ||
1028 | |||
1029 | clean_up_priv_cmd: | ||
1030 | nvgpu_vfree(c->g, entries); | ||
1031 | clean_up_joblist: | ||
1032 | nvgpu_vfree(c->g, c->joblist.pre_alloc.jobs); | ||
1033 | clean_up: | ||
1034 | memset(&c->joblist.pre_alloc, 0, sizeof(c->joblist.pre_alloc)); | ||
1035 | return err; | ||
1036 | } | ||
1037 | |||
1038 | static void channel_gk20a_free_prealloc_resources(struct channel_gk20a *c) | ||
1039 | { | ||
1040 | nvgpu_vfree(c->g, c->joblist.pre_alloc.jobs[0].wait_cmd); | ||
1041 | nvgpu_vfree(c->g, c->joblist.pre_alloc.jobs); | ||
1042 | gk20a_free_fence_pool(c); | ||
1043 | |||
1044 | /* | ||
1045 | * commit the previous writes before disabling the flag. | ||
1046 | * see corresponding nvgpu_smp_rmb in | ||
1047 | * channel_gk20a_is_prealloc_enabled() | ||
1048 | */ | ||
1049 | nvgpu_smp_wmb(); | ||
1050 | c->joblist.pre_alloc.enabled = false; | ||
1051 | } | ||
1052 | |||
1053 | int gk20a_channel_alloc_gpfifo(struct channel_gk20a *c, | ||
1054 | struct nvgpu_gpfifo_args *gpfifo_args) | ||
1055 | { | ||
1056 | struct gk20a *g = c->g; | ||
1057 | struct vm_gk20a *ch_vm; | ||
1058 | u32 gpfifo_size, gpfifo_entry_size; | ||
1059 | int err = 0; | ||
1060 | unsigned long acquire_timeout; | ||
1061 | |||
1062 | gpfifo_size = gpfifo_args->num_entries; | ||
1063 | gpfifo_entry_size = nvgpu_get_gpfifo_entry_size(); | ||
1064 | |||
1065 | if (gpfifo_args->flags & NVGPU_GPFIFO_FLAGS_SUPPORT_VPR) | ||
1066 | c->vpr = true; | ||
1067 | |||
1068 | if (gpfifo_args->flags & NVGPU_GPFIFO_FLAGS_SUPPORT_DETERMINISTIC) { | ||
1069 | nvgpu_rwsem_down_read(&g->deterministic_busy); | ||
1070 | /* | ||
1071 | * Railgating isn't deterministic; instead of disallowing | ||
1072 | * railgating globally, take a power refcount for this | ||
1073 | * channel's lifetime. The gk20a_idle() pair for this happens | ||
1074 | * when the channel gets freed. | ||
1075 | * | ||
1076 | * Deterministic flag and this busy must be atomic within the | ||
1077 | * busy lock. | ||
1078 | */ | ||
1079 | err = gk20a_busy(g); | ||
1080 | if (err) { | ||
1081 | nvgpu_rwsem_up_read(&g->deterministic_busy); | ||
1082 | return err; | ||
1083 | } | ||
1084 | |||
1085 | c->deterministic = true; | ||
1086 | nvgpu_rwsem_up_read(&g->deterministic_busy); | ||
1087 | } | ||
1088 | |||
1089 | /* an address space needs to have been bound at this point. */ | ||
1090 | if (!gk20a_channel_as_bound(c)) { | ||
1091 | nvgpu_err(g, | ||
1092 | "not bound to an address space at time of gpfifo" | ||
1093 | " allocation."); | ||
1094 | err = -EINVAL; | ||
1095 | goto clean_up_idle; | ||
1096 | } | ||
1097 | ch_vm = c->vm; | ||
1098 | |||
1099 | if (c->gpfifo.mem.size) { | ||
1100 | nvgpu_err(g, "channel %d :" | ||
1101 | "gpfifo already allocated", c->chid); | ||
1102 | err = -EEXIST; | ||
1103 | goto clean_up_idle; | ||
1104 | } | ||
1105 | |||
1106 | if (gpfifo_args->flags & NVGPU_GPFIFO_FLAGS_USERMODE_SUPPORT) { | ||
1107 | if (g->ops.fifo.alloc_usermode_buffers) { | ||
1108 | err = g->ops.fifo.alloc_usermode_buffers(c, | ||
1109 | gpfifo_args); | ||
1110 | if (err) { | ||
1111 | nvgpu_err(g, "Usermode buffer alloc failed"); | ||
1112 | goto clean_up; | ||
1113 | } | ||
1114 | c->userd_iova = nvgpu_mem_get_addr(g, | ||
1115 | &c->usermode_userd); | ||
1116 | c->usermode_submit_enabled = true; | ||
1117 | } else { | ||
1118 | nvgpu_err(g, "Usermode submit not supported"); | ||
1119 | err = -EINVAL; | ||
1120 | goto clean_up; | ||
1121 | } | ||
1122 | } | ||
1123 | |||
1124 | err = nvgpu_dma_alloc_map_sys(ch_vm, | ||
1125 | gpfifo_size * gpfifo_entry_size, | ||
1126 | &c->gpfifo.mem); | ||
1127 | if (err) { | ||
1128 | nvgpu_err(g, "%s: memory allocation failed", __func__); | ||
1129 | goto clean_up_usermode; | ||
1130 | } | ||
1131 | |||
1132 | if (c->gpfifo.mem.aperture == APERTURE_VIDMEM) { | ||
1133 | c->gpfifo.pipe = nvgpu_big_malloc(g, | ||
1134 | gpfifo_size * gpfifo_entry_size); | ||
1135 | if (!c->gpfifo.pipe) { | ||
1136 | err = -ENOMEM; | ||
1137 | goto clean_up_unmap; | ||
1138 | } | ||
1139 | } | ||
1140 | |||
1141 | c->gpfifo.entry_num = gpfifo_size; | ||
1142 | c->gpfifo.get = c->gpfifo.put = 0; | ||
1143 | |||
1144 | nvgpu_log_info(g, "channel %d : gpfifo_base 0x%016llx, size %d", | ||
1145 | c->chid, c->gpfifo.mem.gpu_va, c->gpfifo.entry_num); | ||
1146 | |||
1147 | g->ops.fifo.setup_userd(c); | ||
1148 | |||
1149 | if (!g->aggressive_sync_destroy_thresh) { | ||
1150 | nvgpu_mutex_acquire(&c->sync_lock); | ||
1151 | c->sync = gk20a_channel_sync_create(c, false); | ||
1152 | if (!c->sync) { | ||
1153 | err = -ENOMEM; | ||
1154 | nvgpu_mutex_release(&c->sync_lock); | ||
1155 | goto clean_up_unmap; | ||
1156 | } | ||
1157 | nvgpu_mutex_release(&c->sync_lock); | ||
1158 | |||
1159 | if (g->ops.fifo.resetup_ramfc) { | ||
1160 | err = g->ops.fifo.resetup_ramfc(c); | ||
1161 | if (err) | ||
1162 | goto clean_up_sync; | ||
1163 | } | ||
1164 | } | ||
1165 | |||
1166 | if (!nvgpu_is_timeouts_enabled(c->g) || !c->timeout.enabled) | ||
1167 | acquire_timeout = 0; | ||
1168 | else | ||
1169 | acquire_timeout = c->timeout.limit_ms; | ||
1170 | |||
1171 | err = g->ops.fifo.setup_ramfc(c, c->gpfifo.mem.gpu_va, | ||
1172 | c->gpfifo.entry_num, | ||
1173 | acquire_timeout, gpfifo_args->flags); | ||
1174 | if (err) | ||
1175 | goto clean_up_sync; | ||
1176 | |||
1177 | /* TBD: setup engine contexts */ | ||
1178 | |||
1179 | if (gpfifo_args->num_inflight_jobs) { | ||
1180 | err = channel_gk20a_prealloc_resources(c, | ||
1181 | gpfifo_args->num_inflight_jobs); | ||
1182 | if (err) | ||
1183 | goto clean_up_sync; | ||
1184 | } | ||
1185 | |||
1186 | err = channel_gk20a_alloc_priv_cmdbuf(c); | ||
1187 | if (err) | ||
1188 | goto clean_up_prealloc; | ||
1189 | |||
1190 | err = channel_gk20a_update_runlist(c, true); | ||
1191 | if (err) | ||
1192 | goto clean_up_priv_cmd; | ||
1193 | |||
1194 | g->ops.fifo.bind_channel(c); | ||
1195 | |||
1196 | nvgpu_log_fn(g, "done"); | ||
1197 | return 0; | ||
1198 | |||
1199 | clean_up_priv_cmd: | ||
1200 | channel_gk20a_free_priv_cmdbuf(c); | ||
1201 | clean_up_prealloc: | ||
1202 | if (gpfifo_args->num_inflight_jobs) | ||
1203 | channel_gk20a_free_prealloc_resources(c); | ||
1204 | clean_up_sync: | ||
1205 | if (c->sync) { | ||
1206 | gk20a_channel_sync_destroy(c->sync, false); | ||
1207 | c->sync = NULL; | ||
1208 | } | ||
1209 | clean_up_unmap: | ||
1210 | nvgpu_big_free(g, c->gpfifo.pipe); | ||
1211 | nvgpu_dma_unmap_free(ch_vm, &c->gpfifo.mem); | ||
1212 | clean_up_usermode: | ||
1213 | if (c->usermode_submit_enabled) { | ||
1214 | gk20a_channel_free_usermode_buffers(c); | ||
1215 | c->userd_iova = nvgpu_mem_get_addr(g, &g->fifo.userd) + | ||
1216 | c->chid * g->fifo.userd_entry_size; | ||
1217 | c->usermode_submit_enabled = false; | ||
1218 | } | ||
1219 | clean_up: | ||
1220 | memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc)); | ||
1221 | clean_up_idle: | ||
1222 | if (c->deterministic) { | ||
1223 | nvgpu_rwsem_down_read(&g->deterministic_busy); | ||
1224 | gk20a_idle(g); | ||
1225 | c->deterministic = false; | ||
1226 | nvgpu_rwsem_up_read(&g->deterministic_busy); | ||
1227 | } | ||
1228 | nvgpu_err(g, "fail"); | ||
1229 | return err; | ||
1230 | } | ||
1231 | |||
1232 | void gk20a_channel_free_usermode_buffers(struct channel_gk20a *c) | ||
1233 | { | ||
1234 | if (nvgpu_mem_is_valid(&c->usermode_userd)) | ||
1235 | nvgpu_dma_free(c->g, &c->usermode_userd); | ||
1236 | } | ||
1237 | |||
1238 | /* Update with this periodically to determine how the gpfifo is draining. */ | ||
1239 | static inline u32 update_gp_get(struct gk20a *g, | ||
1240 | struct channel_gk20a *c) | ||
1241 | { | ||
1242 | u32 new_get = g->ops.fifo.userd_gp_get(g, c); | ||
1243 | |||
1244 | if (new_get < c->gpfifo.get) | ||
1245 | c->gpfifo.wrap = !c->gpfifo.wrap; | ||
1246 | c->gpfifo.get = new_get; | ||
1247 | return new_get; | ||
1248 | } | ||
1249 | |||
1250 | u32 nvgpu_gp_free_count(struct channel_gk20a *c) | ||
1251 | { | ||
1252 | return (c->gpfifo.entry_num - (c->gpfifo.put - c->gpfifo.get) - 1) % | ||
1253 | c->gpfifo.entry_num; | ||
1254 | } | ||
1255 | |||
1256 | bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch, | ||
1257 | u32 timeout_delta_ms, bool *progress) | ||
1258 | { | ||
1259 | u32 gpfifo_get = update_gp_get(ch->g, ch); | ||
1260 | |||
1261 | /* Count consequent timeout isr */ | ||
1262 | if (gpfifo_get == ch->timeout_gpfifo_get) { | ||
1263 | /* we didn't advance since previous channel timeout check */ | ||
1264 | ch->timeout_accumulated_ms += timeout_delta_ms; | ||
1265 | *progress = false; | ||
1266 | } else { | ||
1267 | /* first timeout isr encountered */ | ||
1268 | ch->timeout_accumulated_ms = timeout_delta_ms; | ||
1269 | *progress = true; | ||
1270 | } | ||
1271 | |||
1272 | ch->timeout_gpfifo_get = gpfifo_get; | ||
1273 | |||
1274 | return nvgpu_is_timeouts_enabled(ch->g) && | ||
1275 | ch->timeout_accumulated_ms > ch->timeout_ms_max; | ||
1276 | } | ||
1277 | |||
1278 | u32 nvgpu_get_gp_free_count(struct channel_gk20a *c) | ||
1279 | { | ||
1280 | update_gp_get(c->g, c); | ||
1281 | return nvgpu_gp_free_count(c); | ||
1282 | } | ||
1283 | |||
1284 | static void __gk20a_channel_timeout_start(struct channel_gk20a *ch) | ||
1285 | { | ||
1286 | ch->timeout.gp_get = ch->g->ops.fifo.userd_gp_get(ch->g, ch); | ||
1287 | ch->timeout.pb_get = ch->g->ops.fifo.userd_pb_get(ch->g, ch); | ||
1288 | ch->timeout.running = true; | ||
1289 | nvgpu_timeout_init(ch->g, &ch->timeout.timer, | ||
1290 | ch->timeout.limit_ms, | ||
1291 | NVGPU_TIMER_CPU_TIMER); | ||
1292 | } | ||
1293 | |||
1294 | /** | ||
1295 | * Start a timeout counter (watchdog) on this channel. | ||
1296 | * | ||
1297 | * Trigger a watchdog to recover the channel after the per-platform timeout | ||
1298 | * duration (but strictly no earlier) if the channel hasn't advanced within | ||
1299 | * that time. | ||
1300 | * | ||
1301 | * If the timeout is already running, do nothing. This should be called when | ||
1302 | * new jobs are submitted. The timeout will stop when the last tracked job | ||
1303 | * finishes, making the channel idle. | ||
1304 | * | ||
1305 | * The channel's gpfifo read pointer will be used to determine if the job has | ||
1306 | * actually stuck at that time. After the timeout duration has expired, a | ||
1307 | * worker thread will consider the channel stuck and recover it if stuck. | ||
1308 | */ | ||
1309 | static void gk20a_channel_timeout_start(struct channel_gk20a *ch) | ||
1310 | { | ||
1311 | if (!nvgpu_is_timeouts_enabled(ch->g)) | ||
1312 | return; | ||
1313 | |||
1314 | if (!ch->timeout.enabled) | ||
1315 | return; | ||
1316 | |||
1317 | nvgpu_raw_spinlock_acquire(&ch->timeout.lock); | ||
1318 | |||
1319 | if (ch->timeout.running) { | ||
1320 | nvgpu_raw_spinlock_release(&ch->timeout.lock); | ||
1321 | return; | ||
1322 | } | ||
1323 | __gk20a_channel_timeout_start(ch); | ||
1324 | nvgpu_raw_spinlock_release(&ch->timeout.lock); | ||
1325 | } | ||
1326 | |||
1327 | /** | ||
1328 | * Stop a running timeout counter (watchdog) on this channel. | ||
1329 | * | ||
1330 | * Make the watchdog consider the channel not running, so that it won't get | ||
1331 | * recovered even if no progress is detected. Progress is not tracked if the | ||
1332 | * watchdog is turned off. | ||
1333 | * | ||
1334 | * No guarantees are made about concurrent execution of the timeout handler. | ||
1335 | * (This should be called from an update handler running in the same thread | ||
1336 | * with the watchdog.) | ||
1337 | */ | ||
1338 | static bool gk20a_channel_timeout_stop(struct channel_gk20a *ch) | ||
1339 | { | ||
1340 | bool was_running; | ||
1341 | |||
1342 | nvgpu_raw_spinlock_acquire(&ch->timeout.lock); | ||
1343 | was_running = ch->timeout.running; | ||
1344 | ch->timeout.running = false; | ||
1345 | nvgpu_raw_spinlock_release(&ch->timeout.lock); | ||
1346 | return was_running; | ||
1347 | } | ||
1348 | |||
1349 | /** | ||
1350 | * Continue a previously stopped timeout | ||
1351 | * | ||
1352 | * Enable the timeout again but don't reinitialize its timer. | ||
1353 | * | ||
1354 | * No guarantees are made about concurrent execution of the timeout handler. | ||
1355 | * (This should be called from an update handler running in the same thread | ||
1356 | * with the watchdog.) | ||
1357 | */ | ||
1358 | static void gk20a_channel_timeout_continue(struct channel_gk20a *ch) | ||
1359 | { | ||
1360 | nvgpu_raw_spinlock_acquire(&ch->timeout.lock); | ||
1361 | ch->timeout.running = true; | ||
1362 | nvgpu_raw_spinlock_release(&ch->timeout.lock); | ||
1363 | } | ||
1364 | |||
1365 | /** | ||
1366 | * Rewind the timeout on each non-dormant channel. | ||
1367 | * | ||
1368 | * Reschedule the timeout of each active channel for which timeouts are running | ||
1369 | * as if something was happened on each channel right now. This should be | ||
1370 | * called when a global hang is detected that could cause a false positive on | ||
1371 | * other innocent channels. | ||
1372 | */ | ||
1373 | void gk20a_channel_timeout_restart_all_channels(struct gk20a *g) | ||
1374 | { | ||
1375 | struct fifo_gk20a *f = &g->fifo; | ||
1376 | u32 chid; | ||
1377 | |||
1378 | for (chid = 0; chid < f->num_channels; chid++) { | ||
1379 | struct channel_gk20a *ch = &f->channel[chid]; | ||
1380 | |||
1381 | if (!gk20a_channel_get(ch)) | ||
1382 | continue; | ||
1383 | |||
1384 | nvgpu_raw_spinlock_acquire(&ch->timeout.lock); | ||
1385 | if (ch->timeout.running) | ||
1386 | __gk20a_channel_timeout_start(ch); | ||
1387 | nvgpu_raw_spinlock_release(&ch->timeout.lock); | ||
1388 | |||
1389 | gk20a_channel_put(ch); | ||
1390 | } | ||
1391 | } | ||
1392 | |||
1393 | /** | ||
1394 | * Check if a timed out channel has hung and recover it if it has. | ||
1395 | * | ||
1396 | * Test if this channel has really got stuck at this point by checking if its | ||
1397 | * {gp,pb}_get has advanced or not. If no {gp,pb}_get action happened since | ||
1398 | * when the watchdog was started and it's timed out, force-reset the channel. | ||
1399 | * | ||
1400 | * The gpu is implicitly on at this point, because the watchdog can only run on | ||
1401 | * channels that have submitted jobs pending for cleanup. | ||
1402 | */ | ||
1403 | static void gk20a_channel_timeout_handler(struct channel_gk20a *ch) | ||
1404 | { | ||
1405 | struct gk20a *g = ch->g; | ||
1406 | u32 gp_get; | ||
1407 | u32 new_gp_get; | ||
1408 | u64 pb_get; | ||
1409 | u64 new_pb_get; | ||
1410 | |||
1411 | nvgpu_log_fn(g, " "); | ||
1412 | |||
1413 | /* Get status but keep timer running */ | ||
1414 | nvgpu_raw_spinlock_acquire(&ch->timeout.lock); | ||
1415 | gp_get = ch->timeout.gp_get; | ||
1416 | pb_get = ch->timeout.pb_get; | ||
1417 | nvgpu_raw_spinlock_release(&ch->timeout.lock); | ||
1418 | |||
1419 | new_gp_get = g->ops.fifo.userd_gp_get(ch->g, ch); | ||
1420 | new_pb_get = g->ops.fifo.userd_pb_get(ch->g, ch); | ||
1421 | |||
1422 | if (new_gp_get != gp_get || new_pb_get != pb_get) { | ||
1423 | /* Channel has advanced, rewind timer */ | ||
1424 | gk20a_channel_timeout_stop(ch); | ||
1425 | gk20a_channel_timeout_start(ch); | ||
1426 | return; | ||
1427 | } | ||
1428 | |||
1429 | if (!nvgpu_timeout_peek_expired(&ch->timeout.timer)) { | ||
1430 | /* Seems stuck but waiting to time out */ | ||
1431 | return; | ||
1432 | } | ||
1433 | |||
1434 | nvgpu_err(g, "Job on channel %d timed out", | ||
1435 | ch->chid); | ||
1436 | |||
1437 | /* force reset calls gk20a_debug_dump but not this */ | ||
1438 | if (ch->timeout.debug_dump) | ||
1439 | gk20a_gr_debug_dump(g); | ||
1440 | |||
1441 | g->ops.fifo.force_reset_ch(ch, | ||
1442 | NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT, | ||
1443 | ch->timeout.debug_dump); | ||
1444 | } | ||
1445 | |||
1446 | /** | ||
1447 | * Test if the per-channel watchdog is on; check the timeout in that case. | ||
1448 | * | ||
1449 | * Each channel has an expiration time based watchdog. The timer is | ||
1450 | * (re)initialized in two situations: when a new job is submitted on an idle | ||
1451 | * channel and when the timeout is checked but progress is detected. The | ||
1452 | * watchdog timeout limit is a coarse sliding window. | ||
1453 | * | ||
1454 | * The timeout is stopped (disabled) after the last job in a row finishes | ||
1455 | * and marks the channel idle. | ||
1456 | */ | ||
1457 | static void gk20a_channel_timeout_check(struct channel_gk20a *ch) | ||
1458 | { | ||
1459 | bool running; | ||
1460 | |||
1461 | nvgpu_raw_spinlock_acquire(&ch->timeout.lock); | ||
1462 | running = ch->timeout.running; | ||
1463 | nvgpu_raw_spinlock_release(&ch->timeout.lock); | ||
1464 | |||
1465 | if (running) | ||
1466 | gk20a_channel_timeout_handler(ch); | ||
1467 | } | ||
1468 | |||
1469 | /** | ||
1470 | * Loop every living channel, check timeouts and handle stuck channels. | ||
1471 | */ | ||
1472 | static void gk20a_channel_poll_timeouts(struct gk20a *g) | ||
1473 | { | ||
1474 | unsigned int chid; | ||
1475 | |||
1476 | |||
1477 | for (chid = 0; chid < g->fifo.num_channels; chid++) { | ||
1478 | struct channel_gk20a *ch = &g->fifo.channel[chid]; | ||
1479 | |||
1480 | if (gk20a_channel_get(ch)) { | ||
1481 | gk20a_channel_timeout_check(ch); | ||
1482 | gk20a_channel_put(ch); | ||
1483 | } | ||
1484 | } | ||
1485 | } | ||
1486 | |||
1487 | /* | ||
1488 | * Process one scheduled work item for this channel. Currently, the only thing | ||
1489 | * the worker does is job cleanup handling. | ||
1490 | */ | ||
1491 | static void gk20a_channel_worker_process_ch(struct channel_gk20a *ch) | ||
1492 | { | ||
1493 | nvgpu_log_fn(ch->g, " "); | ||
1494 | |||
1495 | gk20a_channel_clean_up_jobs(ch, true); | ||
1496 | |||
1497 | /* ref taken when enqueued */ | ||
1498 | gk20a_channel_put(ch); | ||
1499 | } | ||
1500 | |||
1501 | /** | ||
1502 | * Tell the worker that one more work needs to be done. | ||
1503 | * | ||
1504 | * Increase the work counter to synchronize the worker with the new work. Wake | ||
1505 | * up the worker. If the worker was already running, it will handle this work | ||
1506 | * before going to sleep. | ||
1507 | */ | ||
1508 | static int __gk20a_channel_worker_wakeup(struct gk20a *g) | ||
1509 | { | ||
1510 | int put; | ||
1511 | |||
1512 | nvgpu_log_fn(g, " "); | ||
1513 | |||
1514 | /* | ||
1515 | * Currently, the only work type is associated with a lock, which deals | ||
1516 | * with any necessary barriers. If a work type with no locking were | ||
1517 | * added, a nvgpu_smp_wmb() would be needed here. See | ||
1518 | * ..worker_pending() for a pair. | ||
1519 | */ | ||
1520 | |||
1521 | put = nvgpu_atomic_inc_return(&g->channel_worker.put); | ||
1522 | nvgpu_cond_signal_interruptible(&g->channel_worker.wq); | ||
1523 | |||
1524 | return put; | ||
1525 | } | ||
1526 | |||
1527 | /** | ||
1528 | * Test if there is some work pending. | ||
1529 | * | ||
1530 | * This is a pair for __gk20a_channel_worker_wakeup to be called from the | ||
1531 | * worker. The worker has an internal work counter which is incremented once | ||
1532 | * per finished work item. This is compared with the number of queued jobs, | ||
1533 | * which may be channels on the items list or any other types of work. | ||
1534 | */ | ||
1535 | static bool __gk20a_channel_worker_pending(struct gk20a *g, int get) | ||
1536 | { | ||
1537 | bool pending = nvgpu_atomic_read(&g->channel_worker.put) != get; | ||
1538 | |||
1539 | /* | ||
1540 | * This would be the place for a nvgpu_smp_rmb() pairing | ||
1541 | * a nvgpu_smp_wmb() for a wakeup if we had any work with | ||
1542 | * no implicit barriers caused by locking. | ||
1543 | */ | ||
1544 | |||
1545 | return pending; | ||
1546 | } | ||
1547 | |||
1548 | /** | ||
1549 | * Process the queued works for the worker thread serially. | ||
1550 | * | ||
1551 | * Flush all the work items in the queue one by one. This may block timeout | ||
1552 | * handling for a short while, as these are serialized. | ||
1553 | */ | ||
1554 | static void gk20a_channel_worker_process(struct gk20a *g, int *get) | ||
1555 | { | ||
1556 | |||
1557 | while (__gk20a_channel_worker_pending(g, *get)) { | ||
1558 | struct channel_gk20a *ch = NULL; | ||
1559 | |||
1560 | /* | ||
1561 | * If a channel is on the list, it's guaranteed to be handled | ||
1562 | * eventually just once. However, the opposite is not true. A | ||
1563 | * channel may be being processed if it's on the list or not. | ||
1564 | * | ||
1565 | * With this, processing channel works should be conservative | ||
1566 | * as follows: it's always safe to look at a channel found in | ||
1567 | * the list, and if someone enqueues the channel, it will be | ||
1568 | * handled eventually, even if it's being handled at the same | ||
1569 | * time. A channel is on the list only once; multiple calls to | ||
1570 | * enqueue are harmless. | ||
1571 | */ | ||
1572 | nvgpu_spinlock_acquire(&g->channel_worker.items_lock); | ||
1573 | if (!nvgpu_list_empty(&g->channel_worker.items)) { | ||
1574 | ch = nvgpu_list_first_entry(&g->channel_worker.items, | ||
1575 | channel_gk20a, | ||
1576 | worker_item); | ||
1577 | nvgpu_list_del(&ch->worker_item); | ||
1578 | } | ||
1579 | nvgpu_spinlock_release(&g->channel_worker.items_lock); | ||
1580 | |||
1581 | if (!ch) { | ||
1582 | /* | ||
1583 | * Woke up for some other reason, but there are no | ||
1584 | * other reasons than a channel added in the items list | ||
1585 | * currently, so warn and ack the message. | ||
1586 | */ | ||
1587 | nvgpu_warn(g, "Spurious worker event!"); | ||
1588 | ++*get; | ||
1589 | break; | ||
1590 | } | ||
1591 | |||
1592 | gk20a_channel_worker_process_ch(ch); | ||
1593 | ++*get; | ||
1594 | } | ||
1595 | } | ||
1596 | |||
1597 | /* | ||
1598 | * Look at channel states periodically, until canceled. Abort timed out | ||
1599 | * channels serially. Process all work items found in the queue. | ||
1600 | */ | ||
1601 | static int gk20a_channel_poll_worker(void *arg) | ||
1602 | { | ||
1603 | struct gk20a *g = (struct gk20a *)arg; | ||
1604 | struct gk20a_worker *worker = &g->channel_worker; | ||
1605 | unsigned long watchdog_interval = 100; /* milliseconds */ | ||
1606 | struct nvgpu_timeout timeout; | ||
1607 | int get = 0; | ||
1608 | |||
1609 | nvgpu_log_fn(g, " "); | ||
1610 | |||
1611 | nvgpu_timeout_init(g, &timeout, watchdog_interval, | ||
1612 | NVGPU_TIMER_CPU_TIMER); | ||
1613 | while (!nvgpu_thread_should_stop(&worker->poll_task)) { | ||
1614 | int ret; | ||
1615 | |||
1616 | ret = NVGPU_COND_WAIT_INTERRUPTIBLE( | ||
1617 | &worker->wq, | ||
1618 | __gk20a_channel_worker_pending(g, get), | ||
1619 | watchdog_interval); | ||
1620 | |||
1621 | if (ret == 0) | ||
1622 | gk20a_channel_worker_process(g, &get); | ||
1623 | |||
1624 | if (nvgpu_timeout_peek_expired(&timeout)) { | ||
1625 | gk20a_channel_poll_timeouts(g); | ||
1626 | nvgpu_timeout_init(g, &timeout, watchdog_interval, | ||
1627 | NVGPU_TIMER_CPU_TIMER); | ||
1628 | } | ||
1629 | } | ||
1630 | return 0; | ||
1631 | } | ||
1632 | |||
1633 | static int __nvgpu_channel_worker_start(struct gk20a *g) | ||
1634 | { | ||
1635 | char thread_name[64]; | ||
1636 | int err = 0; | ||
1637 | |||
1638 | if (nvgpu_thread_is_running(&g->channel_worker.poll_task)) | ||
1639 | return err; | ||
1640 | |||
1641 | nvgpu_mutex_acquire(&g->channel_worker.start_lock); | ||
1642 | |||
1643 | /* | ||
1644 | * We don't want to grab a mutex on every channel update so we check | ||
1645 | * again if the worker has been initialized before creating a new thread | ||
1646 | */ | ||
1647 | |||
1648 | /* | ||
1649 | * Mutexes have implicit barriers, so there is no risk of a thread | ||
1650 | * having a stale copy of the poll_task variable as the call to | ||
1651 | * thread_is_running is volatile | ||
1652 | */ | ||
1653 | |||
1654 | if (nvgpu_thread_is_running(&g->channel_worker.poll_task)) { | ||
1655 | nvgpu_mutex_release(&g->channel_worker.start_lock); | ||
1656 | return err; | ||
1657 | } | ||
1658 | |||
1659 | snprintf(thread_name, sizeof(thread_name), | ||
1660 | "nvgpu_channel_poll_%s", g->name); | ||
1661 | |||
1662 | err = nvgpu_thread_create(&g->channel_worker.poll_task, g, | ||
1663 | gk20a_channel_poll_worker, thread_name); | ||
1664 | |||
1665 | nvgpu_mutex_release(&g->channel_worker.start_lock); | ||
1666 | return err; | ||
1667 | } | ||
1668 | /** | ||
1669 | * Initialize the channel worker's metadata and start the background thread. | ||
1670 | */ | ||
1671 | int nvgpu_channel_worker_init(struct gk20a *g) | ||
1672 | { | ||
1673 | int err; | ||
1674 | |||
1675 | nvgpu_atomic_set(&g->channel_worker.put, 0); | ||
1676 | nvgpu_cond_init(&g->channel_worker.wq); | ||
1677 | nvgpu_init_list_node(&g->channel_worker.items); | ||
1678 | nvgpu_spinlock_init(&g->channel_worker.items_lock); | ||
1679 | err = nvgpu_mutex_init(&g->channel_worker.start_lock); | ||
1680 | if (err) | ||
1681 | goto error_check; | ||
1682 | |||
1683 | err = __nvgpu_channel_worker_start(g); | ||
1684 | error_check: | ||
1685 | if (err) { | ||
1686 | nvgpu_err(g, "failed to start channel poller thread"); | ||
1687 | return err; | ||
1688 | } | ||
1689 | return 0; | ||
1690 | } | ||
1691 | |||
1692 | void nvgpu_channel_worker_deinit(struct gk20a *g) | ||
1693 | { | ||
1694 | nvgpu_mutex_acquire(&g->channel_worker.start_lock); | ||
1695 | nvgpu_thread_stop(&g->channel_worker.poll_task); | ||
1696 | nvgpu_mutex_release(&g->channel_worker.start_lock); | ||
1697 | } | ||
1698 | |||
1699 | /** | ||
1700 | * Append a channel to the worker's list, if not there already. | ||
1701 | * | ||
1702 | * The worker thread processes work items (channels in its work list) and polls | ||
1703 | * for other things. This adds @ch to the end of the list and wakes the worker | ||
1704 | * up immediately. If the channel already existed in the list, it's not added, | ||
1705 | * because in that case it has been scheduled already but has not yet been | ||
1706 | * processed. | ||
1707 | */ | ||
1708 | static void gk20a_channel_worker_enqueue(struct channel_gk20a *ch) | ||
1709 | { | ||
1710 | struct gk20a *g = ch->g; | ||
1711 | |||
1712 | nvgpu_log_fn(g, " "); | ||
1713 | |||
1714 | /* | ||
1715 | * Warn if worker thread cannot run | ||
1716 | */ | ||
1717 | if (WARN_ON(__nvgpu_channel_worker_start(g))) { | ||
1718 | nvgpu_warn(g, "channel worker cannot run!"); | ||
1719 | return; | ||
1720 | } | ||
1721 | |||
1722 | /* | ||
1723 | * Ref released when this item gets processed. The caller should hold | ||
1724 | * one ref already, so normally shouldn't fail, but the channel could | ||
1725 | * end up being freed between the time the caller got its reference and | ||
1726 | * the time we end up here (e.g., if the client got killed); if so, just | ||
1727 | * return. | ||
1728 | */ | ||
1729 | if (!gk20a_channel_get(ch)) { | ||
1730 | nvgpu_info(g, "cannot get ch ref for worker!"); | ||
1731 | return; | ||
1732 | } | ||
1733 | |||
1734 | nvgpu_spinlock_acquire(&g->channel_worker.items_lock); | ||
1735 | if (!nvgpu_list_empty(&ch->worker_item)) { | ||
1736 | /* | ||
1737 | * Already queued, so will get processed eventually. | ||
1738 | * The worker is probably awake already. | ||
1739 | */ | ||
1740 | nvgpu_spinlock_release(&g->channel_worker.items_lock); | ||
1741 | gk20a_channel_put(ch); | ||
1742 | return; | ||
1743 | } | ||
1744 | nvgpu_list_add_tail(&ch->worker_item, &g->channel_worker.items); | ||
1745 | nvgpu_spinlock_release(&g->channel_worker.items_lock); | ||
1746 | |||
1747 | __gk20a_channel_worker_wakeup(g); | ||
1748 | } | ||
1749 | |||
1750 | int gk20a_free_priv_cmdbuf(struct channel_gk20a *c, struct priv_cmd_entry *e) | ||
1751 | { | ||
1752 | struct priv_cmd_queue *q = &c->priv_cmd_q; | ||
1753 | struct gk20a *g = c->g; | ||
1754 | |||
1755 | if (!e) | ||
1756 | return 0; | ||
1757 | |||
1758 | if (e->valid) { | ||
1759 | /* read the entry's valid flag before reading its contents */ | ||
1760 | nvgpu_smp_rmb(); | ||
1761 | if ((q->get != e->off) && e->off != 0) | ||
1762 | nvgpu_err(g, "requests out-of-order, ch=%d", | ||
1763 | c->chid); | ||
1764 | q->get = e->off + e->size; | ||
1765 | } | ||
1766 | |||
1767 | free_priv_cmdbuf(c, e); | ||
1768 | |||
1769 | return 0; | ||
1770 | } | ||
1771 | |||
1772 | int gk20a_channel_add_job(struct channel_gk20a *c, | ||
1773 | struct channel_gk20a_job *job, | ||
1774 | bool skip_buffer_refcounting) | ||
1775 | { | ||
1776 | struct vm_gk20a *vm = c->vm; | ||
1777 | struct nvgpu_mapped_buf **mapped_buffers = NULL; | ||
1778 | int err = 0, num_mapped_buffers = 0; | ||
1779 | bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(c); | ||
1780 | |||
1781 | if (!skip_buffer_refcounting) { | ||
1782 | err = nvgpu_vm_get_buffers(vm, &mapped_buffers, | ||
1783 | &num_mapped_buffers); | ||
1784 | if (err) | ||
1785 | return err; | ||
1786 | } | ||
1787 | |||
1788 | /* | ||
1789 | * Ref to hold the channel open during the job lifetime. This is | ||
1790 | * released by job cleanup launched via syncpt or sema interrupt. | ||
1791 | */ | ||
1792 | c = gk20a_channel_get(c); | ||
1793 | |||
1794 | if (c) { | ||
1795 | job->num_mapped_buffers = num_mapped_buffers; | ||
1796 | job->mapped_buffers = mapped_buffers; | ||
1797 | |||
1798 | gk20a_channel_timeout_start(c); | ||
1799 | |||
1800 | if (!pre_alloc_enabled) | ||
1801 | channel_gk20a_joblist_lock(c); | ||
1802 | |||
1803 | /* | ||
1804 | * ensure all pending write complete before adding to the list. | ||
1805 | * see corresponding nvgpu_smp_rmb in | ||
1806 | * gk20a_channel_clean_up_jobs() | ||
1807 | */ | ||
1808 | nvgpu_smp_wmb(); | ||
1809 | channel_gk20a_joblist_add(c, job); | ||
1810 | |||
1811 | if (!pre_alloc_enabled) | ||
1812 | channel_gk20a_joblist_unlock(c); | ||
1813 | } else { | ||
1814 | err = -ETIMEDOUT; | ||
1815 | goto err_put_buffers; | ||
1816 | } | ||
1817 | |||
1818 | return 0; | ||
1819 | |||
1820 | err_put_buffers: | ||
1821 | nvgpu_vm_put_buffers(vm, mapped_buffers, num_mapped_buffers); | ||
1822 | |||
1823 | return err; | ||
1824 | } | ||
1825 | |||
1826 | /** | ||
1827 | * Clean up job resources for further jobs to use. | ||
1828 | * @clean_all: If true, process as many jobs as possible, otherwise just one. | ||
1829 | * | ||
1830 | * Loop all jobs from the joblist until a pending job is found, or just one if | ||
1831 | * clean_all is not set. Pending jobs are detected from the job's post fence, | ||
1832 | * so this is only done for jobs that have job tracking resources. Free all | ||
1833 | * per-job memory for completed jobs; in case of preallocated resources, this | ||
1834 | * opens up slots for new jobs to be submitted. | ||
1835 | */ | ||
1836 | void gk20a_channel_clean_up_jobs(struct channel_gk20a *c, | ||
1837 | bool clean_all) | ||
1838 | { | ||
1839 | struct vm_gk20a *vm; | ||
1840 | struct channel_gk20a_job *job; | ||
1841 | struct gk20a *g; | ||
1842 | int job_finished = 0; | ||
1843 | bool watchdog_on = false; | ||
1844 | |||
1845 | c = gk20a_channel_get(c); | ||
1846 | if (!c) | ||
1847 | return; | ||
1848 | |||
1849 | if (!c->g->power_on) { /* shutdown case */ | ||
1850 | gk20a_channel_put(c); | ||
1851 | return; | ||
1852 | } | ||
1853 | |||
1854 | vm = c->vm; | ||
1855 | g = c->g; | ||
1856 | |||
1857 | /* | ||
1858 | * If !clean_all, we're in a condition where watchdog isn't supported | ||
1859 | * anyway (this would be a no-op). | ||
1860 | */ | ||
1861 | if (clean_all) | ||
1862 | watchdog_on = gk20a_channel_timeout_stop(c); | ||
1863 | |||
1864 | /* Synchronize with abort cleanup that needs the jobs. */ | ||
1865 | nvgpu_mutex_acquire(&c->joblist.cleanup_lock); | ||
1866 | |||
1867 | while (1) { | ||
1868 | bool completed; | ||
1869 | |||
1870 | channel_gk20a_joblist_lock(c); | ||
1871 | if (channel_gk20a_joblist_is_empty(c)) { | ||
1872 | /* | ||
1873 | * No jobs in flight, timeout will remain stopped until | ||
1874 | * new jobs are submitted. | ||
1875 | */ | ||
1876 | channel_gk20a_joblist_unlock(c); | ||
1877 | break; | ||
1878 | } | ||
1879 | |||
1880 | /* | ||
1881 | * ensure that all subsequent reads occur after checking | ||
1882 | * that we have a valid node. see corresponding nvgpu_smp_wmb in | ||
1883 | * gk20a_channel_add_job(). | ||
1884 | */ | ||
1885 | nvgpu_smp_rmb(); | ||
1886 | job = channel_gk20a_joblist_peek(c); | ||
1887 | channel_gk20a_joblist_unlock(c); | ||
1888 | |||
1889 | completed = gk20a_fence_is_expired(job->post_fence); | ||
1890 | if (!completed) { | ||
1891 | /* | ||
1892 | * The watchdog eventually sees an updated gp_get if | ||
1893 | * something happened in this loop. A new job can have | ||
1894 | * been submitted between the above call to stop and | ||
1895 | * this - in that case, this is a no-op and the new | ||
1896 | * later timeout is still used. | ||
1897 | */ | ||
1898 | if (clean_all && watchdog_on) | ||
1899 | gk20a_channel_timeout_continue(c); | ||
1900 | break; | ||
1901 | } | ||
1902 | |||
1903 | WARN_ON(!c->sync); | ||
1904 | |||
1905 | if (c->sync) { | ||
1906 | if (c->has_os_fence_framework_support && | ||
1907 | g->os_channel.os_fence_framework_inst_exists(c)) | ||
1908 | g->os_channel.signal_os_fence_framework(c); | ||
1909 | |||
1910 | if (g->aggressive_sync_destroy_thresh) { | ||
1911 | nvgpu_mutex_acquire(&c->sync_lock); | ||
1912 | if (nvgpu_atomic_dec_and_test( | ||
1913 | &c->sync->refcount) && | ||
1914 | g->aggressive_sync_destroy) { | ||
1915 | gk20a_channel_sync_destroy(c->sync, | ||
1916 | false); | ||
1917 | c->sync = NULL; | ||
1918 | } | ||
1919 | nvgpu_mutex_release(&c->sync_lock); | ||
1920 | } | ||
1921 | } | ||
1922 | |||
1923 | if (job->num_mapped_buffers) | ||
1924 | nvgpu_vm_put_buffers(vm, job->mapped_buffers, | ||
1925 | job->num_mapped_buffers); | ||
1926 | |||
1927 | /* Remove job from channel's job list before we close the | ||
1928 | * fences, to prevent other callers (gk20a_channel_abort) from | ||
1929 | * trying to dereference post_fence when it no longer exists. | ||
1930 | */ | ||
1931 | channel_gk20a_joblist_lock(c); | ||
1932 | channel_gk20a_joblist_delete(c, job); | ||
1933 | channel_gk20a_joblist_unlock(c); | ||
1934 | |||
1935 | /* Close the fence (this will unref the semaphore and release | ||
1936 | * it to the pool). */ | ||
1937 | gk20a_fence_put(job->post_fence); | ||
1938 | |||
1939 | /* Free the private command buffers (wait_cmd first and | ||
1940 | * then incr_cmd i.e. order of allocation) */ | ||
1941 | gk20a_free_priv_cmdbuf(c, job->wait_cmd); | ||
1942 | gk20a_free_priv_cmdbuf(c, job->incr_cmd); | ||
1943 | |||
1944 | /* another bookkeeping taken in add_job. caller must hold a ref | ||
1945 | * so this wouldn't get freed here. */ | ||
1946 | gk20a_channel_put(c); | ||
1947 | |||
1948 | /* | ||
1949 | * ensure all pending writes complete before freeing up the job. | ||
1950 | * see corresponding nvgpu_smp_rmb in channel_gk20a_alloc_job(). | ||
1951 | */ | ||
1952 | nvgpu_smp_wmb(); | ||
1953 | |||
1954 | channel_gk20a_free_job(c, job); | ||
1955 | job_finished = 1; | ||
1956 | |||
1957 | /* | ||
1958 | * Deterministic channels have a channel-wide power reference; | ||
1959 | * for others, there's one per submit. | ||
1960 | */ | ||
1961 | if (!c->deterministic) | ||
1962 | gk20a_idle(g); | ||
1963 | |||
1964 | if (!clean_all) { | ||
1965 | /* Timeout isn't supported here so don't touch it. */ | ||
1966 | break; | ||
1967 | } | ||
1968 | } | ||
1969 | |||
1970 | nvgpu_mutex_release(&c->joblist.cleanup_lock); | ||
1971 | |||
1972 | if (job_finished && g->os_channel.work_completion_signal) | ||
1973 | g->os_channel.work_completion_signal(c); | ||
1974 | |||
1975 | gk20a_channel_put(c); | ||
1976 | } | ||
1977 | |||
1978 | /** | ||
1979 | * Schedule a job cleanup work on this channel to free resources and to signal | ||
1980 | * about completion. | ||
1981 | * | ||
1982 | * Call this when there has been an interrupt about finished jobs, or when job | ||
1983 | * cleanup needs to be performed, e.g., when closing a channel. This is always | ||
1984 | * safe to call even if there is nothing to clean up. Any visible actions on | ||
1985 | * jobs just before calling this are guaranteed to be processed. | ||
1986 | */ | ||
1987 | void gk20a_channel_update(struct channel_gk20a *c) | ||
1988 | { | ||
1989 | if (!c->g->power_on) { /* shutdown case */ | ||
1990 | return; | ||
1991 | } | ||
1992 | |||
1993 | trace_gk20a_channel_update(c->chid); | ||
1994 | /* A queued channel is always checked for job cleanup. */ | ||
1995 | gk20a_channel_worker_enqueue(c); | ||
1996 | } | ||
1997 | |||
1998 | /* | ||
1999 | * Stop deterministic channel activity for do_idle() when power needs to go off | ||
2000 | * momentarily but deterministic channels keep power refs for potentially a | ||
2001 | * long time. | ||
2002 | * | ||
2003 | * Takes write access on g->deterministic_busy. | ||
2004 | * | ||
2005 | * Must be paired with gk20a_channel_deterministic_unidle(). | ||
2006 | */ | ||
2007 | void gk20a_channel_deterministic_idle(struct gk20a *g) | ||
2008 | { | ||
2009 | struct fifo_gk20a *f = &g->fifo; | ||
2010 | u32 chid; | ||
2011 | |||
2012 | /* Grab exclusive access to the hw to block new submits */ | ||
2013 | nvgpu_rwsem_down_write(&g->deterministic_busy); | ||
2014 | |||
2015 | for (chid = 0; chid < f->num_channels; chid++) { | ||
2016 | struct channel_gk20a *ch = &f->channel[chid]; | ||
2017 | |||
2018 | if (!gk20a_channel_get(ch)) | ||
2019 | continue; | ||
2020 | |||
2021 | if (ch->deterministic && !ch->deterministic_railgate_allowed) { | ||
2022 | /* | ||
2023 | * Drop the power ref taken when setting deterministic | ||
2024 | * flag. deterministic_unidle will put this and the | ||
2025 | * channel ref back. If railgate is allowed separately | ||
2026 | * for this channel, the power ref has already been put | ||
2027 | * away. | ||
2028 | * | ||
2029 | * Hold the channel ref: it must not get freed in | ||
2030 | * between. A race could otherwise result in lost | ||
2031 | * gk20a_busy() via unidle, and in unbalanced | ||
2032 | * gk20a_idle() via closing the channel. | ||
2033 | */ | ||
2034 | gk20a_idle(g); | ||
2035 | } else { | ||
2036 | /* Not interesting, carry on. */ | ||
2037 | gk20a_channel_put(ch); | ||
2038 | } | ||
2039 | } | ||
2040 | } | ||
2041 | |||
2042 | /* | ||
2043 | * Allow deterministic channel activity again for do_unidle(). | ||
2044 | * | ||
2045 | * This releases write access on g->deterministic_busy. | ||
2046 | */ | ||
2047 | void gk20a_channel_deterministic_unidle(struct gk20a *g) | ||
2048 | { | ||
2049 | struct fifo_gk20a *f = &g->fifo; | ||
2050 | u32 chid; | ||
2051 | |||
2052 | for (chid = 0; chid < f->num_channels; chid++) { | ||
2053 | struct channel_gk20a *ch = &f->channel[chid]; | ||
2054 | |||
2055 | if (!gk20a_channel_get(ch)) | ||
2056 | continue; | ||
2057 | |||
2058 | /* | ||
2059 | * Deterministic state changes inside deterministic_busy lock, | ||
2060 | * which we took in deterministic_idle. | ||
2061 | */ | ||
2062 | if (ch->deterministic && !ch->deterministic_railgate_allowed) { | ||
2063 | if (gk20a_busy(g)) | ||
2064 | nvgpu_err(g, "cannot busy() again!"); | ||
2065 | /* Took this in idle() */ | ||
2066 | gk20a_channel_put(ch); | ||
2067 | } | ||
2068 | |||
2069 | gk20a_channel_put(ch); | ||
2070 | } | ||
2071 | |||
2072 | /* Release submits, new deterministic channels and frees */ | ||
2073 | nvgpu_rwsem_up_write(&g->deterministic_busy); | ||
2074 | } | ||
2075 | |||
2076 | int gk20a_init_channel_support(struct gk20a *g, u32 chid) | ||
2077 | { | ||
2078 | struct channel_gk20a *c = g->fifo.channel+chid; | ||
2079 | int err; | ||
2080 | |||
2081 | c->g = NULL; | ||
2082 | c->chid = chid; | ||
2083 | nvgpu_atomic_set(&c->bound, false); | ||
2084 | nvgpu_spinlock_init(&c->ref_obtain_lock); | ||
2085 | nvgpu_atomic_set(&c->ref_count, 0); | ||
2086 | c->referenceable = false; | ||
2087 | nvgpu_cond_init(&c->ref_count_dec_wq); | ||
2088 | |||
2089 | #if GK20A_CHANNEL_REFCOUNT_TRACKING | ||
2090 | nvgpu_spinlock_init(&c->ref_actions_lock); | ||
2091 | #endif | ||
2092 | nvgpu_spinlock_init(&c->joblist.dynamic.lock); | ||
2093 | nvgpu_raw_spinlock_init(&c->timeout.lock); | ||
2094 | |||
2095 | nvgpu_init_list_node(&c->joblist.dynamic.jobs); | ||
2096 | nvgpu_init_list_node(&c->dbg_s_list); | ||
2097 | nvgpu_init_list_node(&c->worker_item); | ||
2098 | |||
2099 | err = nvgpu_mutex_init(&c->ioctl_lock); | ||
2100 | if (err) | ||
2101 | return err; | ||
2102 | err = nvgpu_mutex_init(&c->joblist.cleanup_lock); | ||
2103 | if (err) | ||
2104 | goto fail_1; | ||
2105 | err = nvgpu_mutex_init(&c->joblist.pre_alloc.read_lock); | ||
2106 | if (err) | ||
2107 | goto fail_2; | ||
2108 | err = nvgpu_mutex_init(&c->sync_lock); | ||
2109 | if (err) | ||
2110 | goto fail_3; | ||
2111 | #if defined(CONFIG_GK20A_CYCLE_STATS) | ||
2112 | err = nvgpu_mutex_init(&c->cyclestate.cyclestate_buffer_mutex); | ||
2113 | if (err) | ||
2114 | goto fail_4; | ||
2115 | err = nvgpu_mutex_init(&c->cs_client_mutex); | ||
2116 | if (err) | ||
2117 | goto fail_5; | ||
2118 | #endif | ||
2119 | err = nvgpu_mutex_init(&c->dbg_s_lock); | ||
2120 | if (err) | ||
2121 | goto fail_6; | ||
2122 | |||
2123 | nvgpu_list_add(&c->free_chs, &g->fifo.free_chs); | ||
2124 | |||
2125 | return 0; | ||
2126 | |||
2127 | fail_6: | ||
2128 | #if defined(CONFIG_GK20A_CYCLE_STATS) | ||
2129 | nvgpu_mutex_destroy(&c->cs_client_mutex); | ||
2130 | fail_5: | ||
2131 | nvgpu_mutex_destroy(&c->cyclestate.cyclestate_buffer_mutex); | ||
2132 | fail_4: | ||
2133 | #endif | ||
2134 | nvgpu_mutex_destroy(&c->sync_lock); | ||
2135 | fail_3: | ||
2136 | nvgpu_mutex_destroy(&c->joblist.pre_alloc.read_lock); | ||
2137 | fail_2: | ||
2138 | nvgpu_mutex_destroy(&c->joblist.cleanup_lock); | ||
2139 | fail_1: | ||
2140 | nvgpu_mutex_destroy(&c->ioctl_lock); | ||
2141 | |||
2142 | return err; | ||
2143 | } | ||
2144 | |||
2145 | /* in this context the "channel" is the host1x channel which | ||
2146 | * maps to *all* gk20a channels */ | ||
2147 | int gk20a_channel_suspend(struct gk20a *g) | ||
2148 | { | ||
2149 | struct fifo_gk20a *f = &g->fifo; | ||
2150 | u32 chid; | ||
2151 | bool channels_in_use = false; | ||
2152 | u32 active_runlist_ids = 0; | ||
2153 | |||
2154 | nvgpu_log_fn(g, " "); | ||
2155 | |||
2156 | for (chid = 0; chid < f->num_channels; chid++) { | ||
2157 | struct channel_gk20a *ch = &f->channel[chid]; | ||
2158 | if (gk20a_channel_get(ch)) { | ||
2159 | nvgpu_log_info(g, "suspend channel %d", chid); | ||
2160 | /* disable channel */ | ||
2161 | gk20a_disable_channel_tsg(g, ch); | ||
2162 | /* preempt the channel */ | ||
2163 | gk20a_fifo_preempt(g, ch); | ||
2164 | /* wait for channel update notifiers */ | ||
2165 | if (g->os_channel.work_completion_cancel_sync) | ||
2166 | g->os_channel.work_completion_cancel_sync(ch); | ||
2167 | |||
2168 | channels_in_use = true; | ||
2169 | |||
2170 | active_runlist_ids |= BIT(ch->runlist_id); | ||
2171 | |||
2172 | gk20a_channel_put(ch); | ||
2173 | } | ||
2174 | } | ||
2175 | |||
2176 | if (channels_in_use) { | ||
2177 | gk20a_fifo_update_runlist_ids(g, active_runlist_ids, ~0, false, true); | ||
2178 | |||
2179 | for (chid = 0; chid < f->num_channels; chid++) { | ||
2180 | if (gk20a_channel_get(&f->channel[chid])) { | ||
2181 | g->ops.fifo.unbind_channel(&f->channel[chid]); | ||
2182 | gk20a_channel_put(&f->channel[chid]); | ||
2183 | } | ||
2184 | } | ||
2185 | } | ||
2186 | |||
2187 | nvgpu_log_fn(g, "done"); | ||
2188 | return 0; | ||
2189 | } | ||
2190 | |||
2191 | int gk20a_channel_resume(struct gk20a *g) | ||
2192 | { | ||
2193 | struct fifo_gk20a *f = &g->fifo; | ||
2194 | u32 chid; | ||
2195 | bool channels_in_use = false; | ||
2196 | u32 active_runlist_ids = 0; | ||
2197 | |||
2198 | nvgpu_log_fn(g, " "); | ||
2199 | |||
2200 | for (chid = 0; chid < f->num_channels; chid++) { | ||
2201 | if (gk20a_channel_get(&f->channel[chid])) { | ||
2202 | nvgpu_log_info(g, "resume channel %d", chid); | ||
2203 | g->ops.fifo.bind_channel(&f->channel[chid]); | ||
2204 | channels_in_use = true; | ||
2205 | active_runlist_ids |= BIT(f->channel[chid].runlist_id); | ||
2206 | gk20a_channel_put(&f->channel[chid]); | ||
2207 | } | ||
2208 | } | ||
2209 | |||
2210 | if (channels_in_use) | ||
2211 | gk20a_fifo_update_runlist_ids(g, active_runlist_ids, ~0, true, true); | ||
2212 | |||
2213 | nvgpu_log_fn(g, "done"); | ||
2214 | return 0; | ||
2215 | } | ||
2216 | |||
2217 | void gk20a_channel_semaphore_wakeup(struct gk20a *g, bool post_events) | ||
2218 | { | ||
2219 | struct fifo_gk20a *f = &g->fifo; | ||
2220 | u32 chid; | ||
2221 | |||
2222 | nvgpu_log_fn(g, " "); | ||
2223 | |||
2224 | /* | ||
2225 | * Ensure that all pending writes are actually done before trying to | ||
2226 | * read semaphore values from DRAM. | ||
2227 | */ | ||
2228 | g->ops.mm.fb_flush(g); | ||
2229 | |||
2230 | for (chid = 0; chid < f->num_channels; chid++) { | ||
2231 | struct channel_gk20a *c = g->fifo.channel+chid; | ||
2232 | if (gk20a_channel_get(c)) { | ||
2233 | if (nvgpu_atomic_read(&c->bound)) { | ||
2234 | nvgpu_cond_broadcast_interruptible( | ||
2235 | &c->semaphore_wq); | ||
2236 | if (post_events) { | ||
2237 | if (gk20a_is_channel_marked_as_tsg(c)) { | ||
2238 | struct tsg_gk20a *tsg = | ||
2239 | &g->fifo.tsg[c->tsgid]; | ||
2240 | |||
2241 | g->ops.fifo.post_event_id(tsg, | ||
2242 | NVGPU_EVENT_ID_BLOCKING_SYNC); | ||
2243 | } | ||
2244 | } | ||
2245 | /* | ||
2246 | * Only non-deterministic channels get the | ||
2247 | * channel_update callback. We don't allow | ||
2248 | * semaphore-backed syncs for these channels | ||
2249 | * anyways, since they have a dependency on | ||
2250 | * the sync framework. | ||
2251 | * If deterministic channels are receiving a | ||
2252 | * semaphore wakeup, it must be for a | ||
2253 | * user-space managed | ||
2254 | * semaphore. | ||
2255 | */ | ||
2256 | if (!c->deterministic) | ||
2257 | gk20a_channel_update(c); | ||
2258 | } | ||
2259 | gk20a_channel_put(c); | ||
2260 | } | ||
2261 | } | ||
2262 | } | ||