diff options
Diffstat (limited to 'include/nvgpu/channel.h')
-rw-r--r-- | include/nvgpu/channel.h | 478 |
1 files changed, 0 insertions, 478 deletions
diff --git a/include/nvgpu/channel.h b/include/nvgpu/channel.h deleted file mode 100644 index 764d047..0000000 --- a/include/nvgpu/channel.h +++ /dev/null | |||
@@ -1,478 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
5 | * copy of this software and associated documentation files (the "Software"), | ||
6 | * to deal in the Software without restriction, including without limitation | ||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
9 | * Software is furnished to do so, subject to the following conditions: | ||
10 | * | ||
11 | * The above copyright notice and this permission notice shall be included in | ||
12 | * all copies or substantial portions of the Software. | ||
13 | * | ||
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
20 | * DEALINGS IN THE SOFTWARE. | ||
21 | */ | ||
22 | |||
23 | #ifndef NVGPU_CHANNEL_H | ||
24 | #define NVGPU_CHANNEL_H | ||
25 | |||
26 | #include <nvgpu/list.h> | ||
27 | #include <nvgpu/lock.h> | ||
28 | #include <nvgpu/timers.h> | ||
29 | #include <nvgpu/cond.h> | ||
30 | #include <nvgpu/atomic.h> | ||
31 | #include <nvgpu/nvgpu_mem.h> | ||
32 | #include <nvgpu/allocator.h> | ||
33 | |||
34 | struct gk20a; | ||
35 | struct dbg_session_gk20a; | ||
36 | struct gk20a_fence; | ||
37 | struct fifo_profile_gk20a; | ||
38 | struct nvgpu_channel_sync; | ||
39 | struct nvgpu_gpfifo_userdata; | ||
40 | |||
41 | /* Flags to be passed to nvgpu_channel_setup_bind() */ | ||
42 | #define NVGPU_SETUP_BIND_FLAGS_SUPPORT_VPR (1U << 0U) | ||
43 | #define NVGPU_SETUP_BIND_FLAGS_SUPPORT_DETERMINISTIC (1U << 1U) | ||
44 | #define NVGPU_SETUP_BIND_FLAGS_REPLAYABLE_FAULTS_ENABLE (1U << 2U) | ||
45 | #define NVGPU_SETUP_BIND_FLAGS_USERMODE_SUPPORT (1U << 3U) | ||
46 | |||
47 | /* Flags to be passed to nvgpu_submit_channel_gpfifo() */ | ||
48 | #define NVGPU_SUBMIT_FLAGS_FENCE_WAIT (1U << 0U) | ||
49 | #define NVGPU_SUBMIT_FLAGS_FENCE_GET (1U << 1U) | ||
50 | #define NVGPU_SUBMIT_FLAGS_HW_FORMAT (1U << 2U) | ||
51 | #define NVGPU_SUBMIT_FLAGS_SYNC_FENCE (1U << 3U) | ||
52 | #define NVGPU_SUBMIT_FLAGS_SUPPRESS_WFI (1U << 4U) | ||
53 | #define NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING (1U << 5U) | ||
54 | |||
55 | /* | ||
56 | * The binary format of 'struct nvgpu_channel_fence' introduced here | ||
57 | * should match that of 'struct nvgpu_fence' defined in uapi header, since | ||
58 | * this struct is intended to be a mirror copy of the uapi struct. This is | ||
59 | * not a hard requirement though because of nvgpu_get_fence_args conversion | ||
60 | * function. | ||
61 | */ | ||
62 | struct nvgpu_channel_fence { | ||
63 | u32 id; | ||
64 | u32 value; | ||
65 | }; | ||
66 | |||
67 | /* | ||
68 | * The binary format of 'struct nvgpu_gpfifo_entry' introduced here | ||
69 | * should match that of 'struct nvgpu_gpfifo' defined in uapi header, since | ||
70 | * this struct is intended to be a mirror copy of the uapi struct. This is | ||
71 | * a rigid requirement because there's no conversion function and there are | ||
72 | * memcpy's present between the user gpfifo (of type nvgpu_gpfifo) and the | ||
73 | * kern gpfifo (of type nvgpu_gpfifo_entry). | ||
74 | */ | ||
75 | struct nvgpu_gpfifo_entry { | ||
76 | u32 entry0; | ||
77 | u32 entry1; | ||
78 | }; | ||
79 | |||
80 | struct gpfifo_desc { | ||
81 | struct nvgpu_mem mem; | ||
82 | u32 entry_num; | ||
83 | |||
84 | u32 get; | ||
85 | u32 put; | ||
86 | |||
87 | bool wrap; | ||
88 | |||
89 | /* if gpfifo lives in vidmem or is forced to go via PRAMIN, first copy | ||
90 | * from userspace to pipe and then from pipe to gpu buffer */ | ||
91 | void *pipe; | ||
92 | }; | ||
93 | |||
94 | struct nvgpu_setup_bind_args { | ||
95 | u32 num_gpfifo_entries; | ||
96 | u32 num_inflight_jobs; | ||
97 | u32 userd_dmabuf_fd; | ||
98 | u64 userd_dmabuf_offset; | ||
99 | u32 gpfifo_dmabuf_fd; | ||
100 | u64 gpfifo_dmabuf_offset; | ||
101 | u32 work_submit_token; | ||
102 | u32 flags; | ||
103 | }; | ||
104 | |||
105 | struct notification { | ||
106 | struct { | ||
107 | u32 nanoseconds[2]; | ||
108 | } timestamp; | ||
109 | u32 info32; | ||
110 | u16 info16; | ||
111 | u16 status; | ||
112 | }; | ||
113 | |||
114 | struct priv_cmd_queue { | ||
115 | struct nvgpu_mem mem; | ||
116 | u32 size; /* num of entries in words */ | ||
117 | u32 put; /* put for priv cmd queue */ | ||
118 | u32 get; /* get for priv cmd queue */ | ||
119 | }; | ||
120 | |||
121 | struct priv_cmd_entry { | ||
122 | bool valid; | ||
123 | struct nvgpu_mem *mem; | ||
124 | u32 off; /* offset in mem, in u32 entries */ | ||
125 | u64 gva; | ||
126 | u32 get; /* start of entry in queue */ | ||
127 | u32 size; /* in words */ | ||
128 | }; | ||
129 | |||
130 | struct channel_gk20a_job { | ||
131 | struct nvgpu_mapped_buf **mapped_buffers; | ||
132 | int num_mapped_buffers; | ||
133 | struct gk20a_fence *post_fence; | ||
134 | struct priv_cmd_entry *wait_cmd; | ||
135 | struct priv_cmd_entry *incr_cmd; | ||
136 | struct nvgpu_list_node list; | ||
137 | }; | ||
138 | |||
139 | static inline struct channel_gk20a_job * | ||
140 | channel_gk20a_job_from_list(struct nvgpu_list_node *node) | ||
141 | { | ||
142 | return (struct channel_gk20a_job *) | ||
143 | ((uintptr_t)node - offsetof(struct channel_gk20a_job, list)); | ||
144 | }; | ||
145 | |||
146 | struct channel_gk20a_joblist { | ||
147 | struct { | ||
148 | bool enabled; | ||
149 | unsigned int length; | ||
150 | unsigned int put; | ||
151 | unsigned int get; | ||
152 | struct channel_gk20a_job *jobs; | ||
153 | struct nvgpu_mutex read_lock; | ||
154 | } pre_alloc; | ||
155 | |||
156 | struct { | ||
157 | struct nvgpu_list_node jobs; | ||
158 | struct nvgpu_spinlock lock; | ||
159 | } dynamic; | ||
160 | |||
161 | /* | ||
162 | * Synchronize abort cleanup (when closing a channel) and job cleanup | ||
163 | * (asynchronously from worker) - protect from concurrent access when | ||
164 | * job resources are being freed. | ||
165 | */ | ||
166 | struct nvgpu_mutex cleanup_lock; | ||
167 | }; | ||
168 | |||
169 | struct channel_gk20a_timeout { | ||
170 | /* lock protects the running timer state */ | ||
171 | struct nvgpu_spinlock lock; | ||
172 | struct nvgpu_timeout timer; | ||
173 | bool running; | ||
174 | u32 gp_get; | ||
175 | u64 pb_get; | ||
176 | |||
177 | /* lock not needed */ | ||
178 | u32 limit_ms; | ||
179 | bool enabled; | ||
180 | bool debug_dump; | ||
181 | }; | ||
182 | |||
183 | /* | ||
184 | * Track refcount actions, saving their stack traces. This number specifies how | ||
185 | * many most recent actions are stored in a buffer. Set to 0 to disable. 128 | ||
186 | * should be enough to track moderately hard problems from the start. | ||
187 | */ | ||
188 | #define GK20A_CHANNEL_REFCOUNT_TRACKING 0 | ||
189 | /* Stack depth for the saved actions. */ | ||
190 | #define GK20A_CHANNEL_REFCOUNT_TRACKING_STACKLEN 8 | ||
191 | |||
192 | /* | ||
193 | * Because the puts and gets are not linked together explicitly (although they | ||
194 | * should always come in pairs), it's not possible to tell which ref holder to | ||
195 | * delete from the list when doing a put. So, just store some number of most | ||
196 | * recent gets and puts in a ring buffer, to obtain a history. | ||
197 | * | ||
198 | * These are zeroed when a channel is closed, so a new one starts fresh. | ||
199 | */ | ||
200 | |||
201 | enum channel_gk20a_ref_action_type { | ||
202 | channel_gk20a_ref_action_get, | ||
203 | channel_gk20a_ref_action_put | ||
204 | }; | ||
205 | |||
206 | #if GK20A_CHANNEL_REFCOUNT_TRACKING | ||
207 | |||
208 | #include <linux/stacktrace.h> | ||
209 | |||
210 | struct channel_gk20a_ref_action { | ||
211 | enum channel_gk20a_ref_action_type type; | ||
212 | s64 timestamp_ms; | ||
213 | /* | ||
214 | * Many of these traces will be similar. Simpler to just capture | ||
215 | * duplicates than to have a separate database for the entries. | ||
216 | */ | ||
217 | struct stack_trace trace; | ||
218 | unsigned long trace_entries[GK20A_CHANNEL_REFCOUNT_TRACKING_STACKLEN]; | ||
219 | }; | ||
220 | #endif | ||
221 | |||
222 | /* this is the priv element of struct nvhost_channel */ | ||
223 | struct channel_gk20a { | ||
224 | struct gk20a *g; /* set only when channel is active */ | ||
225 | |||
226 | struct nvgpu_list_node free_chs; | ||
227 | |||
228 | struct nvgpu_spinlock ref_obtain_lock; | ||
229 | nvgpu_atomic_t ref_count; | ||
230 | struct nvgpu_cond ref_count_dec_wq; | ||
231 | #if GK20A_CHANNEL_REFCOUNT_TRACKING | ||
232 | /* | ||
233 | * Ring buffer for most recent refcount gets and puts. Protected by | ||
234 | * ref_actions_lock when getting or putting refs (i.e., adding | ||
235 | * entries), and when reading entries. | ||
236 | */ | ||
237 | struct channel_gk20a_ref_action ref_actions[ | ||
238 | GK20A_CHANNEL_REFCOUNT_TRACKING]; | ||
239 | size_t ref_actions_put; /* index of next write */ | ||
240 | struct nvgpu_spinlock ref_actions_lock; | ||
241 | #endif | ||
242 | |||
243 | struct nvgpu_semaphore_int *hw_sema; | ||
244 | |||
245 | nvgpu_atomic_t bound; | ||
246 | |||
247 | u32 chid; | ||
248 | u32 tsgid; | ||
249 | pid_t pid; | ||
250 | pid_t tgid; | ||
251 | struct nvgpu_mutex ioctl_lock; | ||
252 | |||
253 | struct nvgpu_list_node ch_entry; /* channel's entry in TSG */ | ||
254 | |||
255 | struct channel_gk20a_joblist joblist; | ||
256 | struct nvgpu_allocator fence_allocator; | ||
257 | |||
258 | struct vm_gk20a *vm; | ||
259 | |||
260 | struct gpfifo_desc gpfifo; | ||
261 | |||
262 | struct nvgpu_mem usermode_userd; /* Used for Usermode Submission */ | ||
263 | struct nvgpu_mem usermode_gpfifo; | ||
264 | struct nvgpu_mem inst_block; | ||
265 | |||
266 | u64 userd_iova; | ||
267 | u64 userd_gpu_va; | ||
268 | |||
269 | struct priv_cmd_queue priv_cmd_q; | ||
270 | |||
271 | struct nvgpu_cond notifier_wq; | ||
272 | struct nvgpu_cond semaphore_wq; | ||
273 | |||
274 | /* kernel watchdog to kill stuck jobs */ | ||
275 | struct channel_gk20a_timeout timeout; | ||
276 | |||
277 | /* for job cleanup handling in the background worker */ | ||
278 | struct nvgpu_list_node worker_item; | ||
279 | |||
280 | #if defined(CONFIG_GK20A_CYCLE_STATS) | ||
281 | struct { | ||
282 | void *cyclestate_buffer; | ||
283 | u32 cyclestate_buffer_size; | ||
284 | struct nvgpu_mutex cyclestate_buffer_mutex; | ||
285 | } cyclestate; | ||
286 | |||
287 | struct nvgpu_mutex cs_client_mutex; | ||
288 | struct gk20a_cs_snapshot_client *cs_client; | ||
289 | #endif | ||
290 | struct nvgpu_mutex dbg_s_lock; | ||
291 | struct nvgpu_list_node dbg_s_list; | ||
292 | |||
293 | struct nvgpu_mutex sync_lock; | ||
294 | struct nvgpu_channel_sync *sync; | ||
295 | struct nvgpu_channel_sync *user_sync; | ||
296 | |||
297 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION | ||
298 | u64 virt_ctx; | ||
299 | #endif | ||
300 | |||
301 | struct nvgpu_mem ctx_header; | ||
302 | |||
303 | struct nvgpu_spinlock ch_timedout_lock; | ||
304 | bool ch_timedout; | ||
305 | /* Any operating system specific data. */ | ||
306 | void *os_priv; | ||
307 | |||
308 | u32 obj_class; /* we support only one obj per channel */ | ||
309 | |||
310 | u32 timeout_accumulated_ms; | ||
311 | u32 timeout_gpfifo_get; | ||
312 | |||
313 | u32 subctx_id; | ||
314 | u32 runqueue_sel; | ||
315 | |||
316 | u32 timeout_ms_max; | ||
317 | u32 runlist_id; | ||
318 | |||
319 | bool mmu_nack_handled; | ||
320 | bool referenceable; | ||
321 | bool vpr; | ||
322 | bool deterministic; | ||
323 | /* deterministic, but explicitly idle and submits disallowed */ | ||
324 | bool deterministic_railgate_allowed; | ||
325 | bool cde; | ||
326 | bool usermode_submit_enabled; | ||
327 | bool timeout_debug_dump; | ||
328 | bool has_os_fence_framework_support; | ||
329 | |||
330 | bool is_privileged_channel; | ||
331 | |||
332 | /** | ||
333 | * MMU Debugger Mode is enabled for this channel if refcnt > 0 | ||
334 | */ | ||
335 | u32 mmu_debug_mode_refcnt; | ||
336 | }; | ||
337 | |||
338 | static inline struct channel_gk20a * | ||
339 | channel_gk20a_from_free_chs(struct nvgpu_list_node *node) | ||
340 | { | ||
341 | return (struct channel_gk20a *) | ||
342 | ((uintptr_t)node - offsetof(struct channel_gk20a, free_chs)); | ||
343 | }; | ||
344 | |||
345 | static inline struct channel_gk20a * | ||
346 | channel_gk20a_from_ch_entry(struct nvgpu_list_node *node) | ||
347 | { | ||
348 | return (struct channel_gk20a *) | ||
349 | ((uintptr_t)node - offsetof(struct channel_gk20a, ch_entry)); | ||
350 | }; | ||
351 | |||
352 | static inline struct channel_gk20a * | ||
353 | channel_gk20a_from_worker_item(struct nvgpu_list_node *node) | ||
354 | { | ||
355 | return (struct channel_gk20a *) | ||
356 | ((uintptr_t)node - offsetof(struct channel_gk20a, worker_item)); | ||
357 | }; | ||
358 | |||
359 | static inline bool gk20a_channel_as_bound(struct channel_gk20a *ch) | ||
360 | { | ||
361 | return !!ch->vm; | ||
362 | } | ||
363 | int channel_gk20a_commit_va(struct channel_gk20a *c); | ||
364 | int gk20a_init_channel_support(struct gk20a *, u32 chid); | ||
365 | |||
366 | /* must be inside gk20a_busy()..gk20a_idle() */ | ||
367 | void gk20a_channel_close(struct channel_gk20a *ch); | ||
368 | void __gk20a_channel_kill(struct channel_gk20a *ch); | ||
369 | |||
370 | bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch, | ||
371 | u32 timeout_delta_ms, bool *progress); | ||
372 | void gk20a_disable_channel(struct channel_gk20a *ch); | ||
373 | void gk20a_channel_abort(struct channel_gk20a *ch, bool channel_preempt); | ||
374 | void gk20a_channel_abort_clean_up(struct channel_gk20a *ch); | ||
375 | void gk20a_channel_semaphore_wakeup(struct gk20a *g, bool post_events); | ||
376 | int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 size, | ||
377 | struct priv_cmd_entry *entry); | ||
378 | int gk20a_free_priv_cmdbuf(struct channel_gk20a *c, struct priv_cmd_entry *e); | ||
379 | |||
380 | int gk20a_enable_channel_tsg(struct gk20a *g, struct channel_gk20a *ch); | ||
381 | int gk20a_disable_channel_tsg(struct gk20a *g, struct channel_gk20a *ch); | ||
382 | |||
383 | int gk20a_channel_suspend(struct gk20a *g); | ||
384 | int gk20a_channel_resume(struct gk20a *g); | ||
385 | |||
386 | void gk20a_channel_deterministic_idle(struct gk20a *g); | ||
387 | void gk20a_channel_deterministic_unidle(struct gk20a *g); | ||
388 | |||
389 | int nvgpu_channel_worker_init(struct gk20a *g); | ||
390 | void nvgpu_channel_worker_deinit(struct gk20a *g); | ||
391 | |||
392 | struct channel_gk20a *gk20a_get_channel_from_file(int fd); | ||
393 | void gk20a_channel_update(struct channel_gk20a *c); | ||
394 | |||
395 | /* returns ch if reference was obtained */ | ||
396 | struct channel_gk20a *__must_check _gk20a_channel_get(struct channel_gk20a *ch, | ||
397 | const char *caller); | ||
398 | #define gk20a_channel_get(ch) _gk20a_channel_get(ch, __func__) | ||
399 | |||
400 | |||
401 | void _gk20a_channel_put(struct channel_gk20a *ch, const char *caller); | ||
402 | #define gk20a_channel_put(ch) _gk20a_channel_put(ch, __func__) | ||
403 | |||
404 | /* returns NULL if could not take a ref to the channel */ | ||
405 | struct channel_gk20a *__must_check _gk20a_channel_from_id(struct gk20a *g, | ||
406 | u32 chid, const char *caller); | ||
407 | #define gk20a_channel_from_id(g, chid) _gk20a_channel_from_id(g, chid, __func__) | ||
408 | |||
409 | int gk20a_wait_channel_idle(struct channel_gk20a *ch); | ||
410 | |||
411 | /* runlist_id -1 is synonym for ENGINE_GR_GK20A runlist id */ | ||
412 | struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g, | ||
413 | s32 runlist_id, | ||
414 | bool is_privileged_channel, | ||
415 | pid_t pid, pid_t tid); | ||
416 | |||
417 | int nvgpu_channel_setup_bind(struct channel_gk20a *c, | ||
418 | struct nvgpu_setup_bind_args *args); | ||
419 | |||
420 | void gk20a_channel_timeout_restart_all_channels(struct gk20a *g); | ||
421 | |||
422 | bool channel_gk20a_is_prealloc_enabled(struct channel_gk20a *c); | ||
423 | void channel_gk20a_joblist_lock(struct channel_gk20a *c); | ||
424 | void channel_gk20a_joblist_unlock(struct channel_gk20a *c); | ||
425 | bool channel_gk20a_joblist_is_empty(struct channel_gk20a *c); | ||
426 | |||
427 | int channel_gk20a_update_runlist(struct channel_gk20a *c, bool add); | ||
428 | int gk20a_channel_get_timescale_from_timeslice(struct gk20a *g, | ||
429 | unsigned int timeslice_period, | ||
430 | unsigned int *__timeslice_timeout, unsigned int *__timeslice_scale); | ||
431 | |||
432 | void gk20a_wait_until_counter_is_N( | ||
433 | struct channel_gk20a *ch, nvgpu_atomic_t *counter, int wait_value, | ||
434 | struct nvgpu_cond *c, const char *caller, const char *counter_name); | ||
435 | int channel_gk20a_alloc_job(struct channel_gk20a *c, | ||
436 | struct channel_gk20a_job **job_out); | ||
437 | void channel_gk20a_free_job(struct channel_gk20a *c, | ||
438 | struct channel_gk20a_job *job); | ||
439 | u32 nvgpu_get_gp_free_count(struct channel_gk20a *c); | ||
440 | u32 nvgpu_gp_free_count(struct channel_gk20a *c); | ||
441 | int gk20a_channel_add_job(struct channel_gk20a *c, | ||
442 | struct channel_gk20a_job *job, | ||
443 | bool skip_buffer_refcounting); | ||
444 | void free_priv_cmdbuf(struct channel_gk20a *c, | ||
445 | struct priv_cmd_entry *e); | ||
446 | void gk20a_channel_clean_up_jobs(struct channel_gk20a *c, | ||
447 | bool clean_all); | ||
448 | |||
449 | void gk20a_channel_free_usermode_buffers(struct channel_gk20a *c); | ||
450 | u32 nvgpu_get_gpfifo_entry_size(void); | ||
451 | |||
452 | int nvgpu_submit_channel_gpfifo_user(struct channel_gk20a *c, | ||
453 | struct nvgpu_gpfifo_userdata userdata, | ||
454 | u32 num_entries, | ||
455 | u32 flags, | ||
456 | struct nvgpu_channel_fence *fence, | ||
457 | struct gk20a_fence **fence_out, | ||
458 | struct fifo_profile_gk20a *profile); | ||
459 | |||
460 | int nvgpu_submit_channel_gpfifo_kernel(struct channel_gk20a *c, | ||
461 | struct nvgpu_gpfifo_entry *gpfifo, | ||
462 | u32 num_entries, | ||
463 | u32 flags, | ||
464 | struct nvgpu_channel_fence *fence, | ||
465 | struct gk20a_fence **fence_out); | ||
466 | |||
467 | #ifdef CONFIG_DEBUG_FS | ||
468 | void trace_write_pushbuffers(struct channel_gk20a *c, u32 count); | ||
469 | #else | ||
470 | static inline void trace_write_pushbuffers(struct channel_gk20a *c, u32 count) | ||
471 | { | ||
472 | } | ||
473 | #endif | ||
474 | |||
475 | void gk20a_channel_set_timedout(struct channel_gk20a *ch); | ||
476 | bool gk20a_channel_check_timedout(struct channel_gk20a *ch); | ||
477 | |||
478 | #endif | ||