diff options
Diffstat (limited to 'drivers/gpu/nvgpu/include')
-rw-r--r-- | drivers/gpu/nvgpu/include/nvgpu/channel.h | 417 |
1 files changed, 411 insertions, 6 deletions
diff --git a/drivers/gpu/nvgpu/include/nvgpu/channel.h b/drivers/gpu/nvgpu/include/nvgpu/channel.h index 7434f0e7..6cca843e 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/channel.h +++ b/drivers/gpu/nvgpu/include/nvgpu/channel.h | |||
@@ -23,13 +23,418 @@ | |||
23 | #ifndef NVGPU_CHANNEL_H | 23 | #ifndef NVGPU_CHANNEL_H |
24 | #define NVGPU_CHANNEL_H | 24 | #define NVGPU_CHANNEL_H |
25 | 25 | ||
26 | #include <nvgpu/types.h> | 26 | #include <nvgpu/list.h> |
27 | #include <nvgpu/lock.h> | ||
28 | #include <nvgpu/timers.h> | ||
29 | #include <nvgpu/cond.h> | ||
30 | #include <nvgpu/atomic.h> | ||
31 | #include <nvgpu/nvgpu_mem.h> | ||
32 | #include <nvgpu/allocator.h> | ||
27 | 33 | ||
28 | #include "gk20a/gk20a.h" | 34 | struct gk20a; |
29 | 35 | struct dbg_session_gk20a; | |
30 | struct nvgpu_channel_fence; | ||
31 | struct gk20a_fence; | 36 | struct gk20a_fence; |
32 | struct fifo_profile_gk20a; | 37 | struct fifo_profile_gk20a; |
38 | struct gk20a_channel_sync; | ||
39 | struct nvgpu_gpfifo_userdata; | ||
40 | |||
41 | /* Flags to be passed to gk20a_channel_alloc_gpfifo() */ | ||
42 | #define NVGPU_GPFIFO_FLAGS_SUPPORT_VPR (1U << 0U) | ||
43 | #define NVGPU_GPFIFO_FLAGS_SUPPORT_DETERMINISTIC (1U << 1U) | ||
44 | #define NVGPU_GPFIFO_FLAGS_REPLAYABLE_FAULTS_ENABLE (1U << 2U) | ||
45 | #define NVGPU_GPFIFO_FLAGS_USERMODE_SUPPORT (1U << 3U) | ||
46 | |||
47 | /* Flags to be passed to nvgpu_submit_channel_gpfifo() */ | ||
48 | #define NVGPU_SUBMIT_FLAGS_FENCE_WAIT (1U << 0U) | ||
49 | #define NVGPU_SUBMIT_FLAGS_FENCE_GET (1U << 1U) | ||
50 | #define NVGPU_SUBMIT_FLAGS_HW_FORMAT (1U << 2U) | ||
51 | #define NVGPU_SUBMIT_FLAGS_SYNC_FENCE (1U << 3U) | ||
52 | #define NVGPU_SUBMIT_FLAGS_SUPPRESS_WFI (1U << 4U) | ||
53 | #define NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING (1U << 5U) | ||
54 | |||
55 | /* | ||
56 | * The binary format of 'struct nvgpu_channel_fence' introduced here | ||
57 | * should match that of 'struct nvgpu_fence' defined in uapi header, since | ||
58 | * this struct is intended to be a mirror copy of the uapi struct. This is | ||
59 | * not a hard requirement though because of nvgpu_get_fence_args conversion | ||
60 | * function. | ||
61 | */ | ||
62 | struct nvgpu_channel_fence { | ||
63 | u32 id; | ||
64 | u32 value; | ||
65 | }; | ||
66 | |||
67 | /* | ||
68 | * The binary format of 'struct nvgpu_gpfifo_entry' introduced here | ||
69 | * should match that of 'struct nvgpu_gpfifo' defined in uapi header, since | ||
70 | * this struct is intended to be a mirror copy of the uapi struct. This is | ||
71 | * a rigid requirement because there's no conversion function and there are | ||
72 | * memcpy's present between the user gpfifo (of type nvgpu_gpfifo) and the | ||
73 | * kern gpfifo (of type nvgpu_gpfifo_entry). | ||
74 | */ | ||
75 | struct nvgpu_gpfifo_entry { | ||
76 | u32 entry0; | ||
77 | u32 entry1; | ||
78 | }; | ||
79 | |||
80 | struct gpfifo_desc { | ||
81 | struct nvgpu_mem mem; | ||
82 | u32 entry_num; | ||
83 | |||
84 | u32 get; | ||
85 | u32 put; | ||
86 | |||
87 | bool wrap; | ||
88 | |||
89 | /* if gpfifo lives in vidmem or is forced to go via PRAMIN, first copy | ||
90 | * from userspace to pipe and then from pipe to gpu buffer */ | ||
91 | void *pipe; | ||
92 | }; | ||
93 | |||
94 | struct nvgpu_gpfifo_args { | ||
95 | u32 num_entries; | ||
96 | u32 num_inflight_jobs; | ||
97 | u32 userd_dmabuf_fd; | ||
98 | u32 gpfifo_dmabuf_fd; | ||
99 | u32 work_submit_token; | ||
100 | u32 flags; | ||
101 | }; | ||
102 | |||
103 | struct notification { | ||
104 | struct { | ||
105 | u32 nanoseconds[2]; | ||
106 | } timestamp; | ||
107 | u32 info32; | ||
108 | u16 info16; | ||
109 | u16 status; | ||
110 | }; | ||
111 | |||
112 | struct priv_cmd_queue { | ||
113 | struct nvgpu_mem mem; | ||
114 | u32 size; /* num of entries in words */ | ||
115 | u32 put; /* put for priv cmd queue */ | ||
116 | u32 get; /* get for priv cmd queue */ | ||
117 | }; | ||
118 | |||
119 | struct priv_cmd_entry { | ||
120 | bool valid; | ||
121 | struct nvgpu_mem *mem; | ||
122 | u32 off; /* offset in mem, in u32 entries */ | ||
123 | u64 gva; | ||
124 | u32 get; /* start of entry in queue */ | ||
125 | u32 size; /* in words */ | ||
126 | }; | ||
127 | |||
128 | struct channel_gk20a_job { | ||
129 | struct nvgpu_mapped_buf **mapped_buffers; | ||
130 | int num_mapped_buffers; | ||
131 | struct gk20a_fence *post_fence; | ||
132 | struct priv_cmd_entry *wait_cmd; | ||
133 | struct priv_cmd_entry *incr_cmd; | ||
134 | struct nvgpu_list_node list; | ||
135 | }; | ||
136 | |||
137 | static inline struct channel_gk20a_job * | ||
138 | channel_gk20a_job_from_list(struct nvgpu_list_node *node) | ||
139 | { | ||
140 | return (struct channel_gk20a_job *) | ||
141 | ((uintptr_t)node - offsetof(struct channel_gk20a_job, list)); | ||
142 | }; | ||
143 | |||
144 | struct channel_gk20a_joblist { | ||
145 | struct { | ||
146 | bool enabled; | ||
147 | unsigned int length; | ||
148 | unsigned int put; | ||
149 | unsigned int get; | ||
150 | struct channel_gk20a_job *jobs; | ||
151 | struct nvgpu_mutex read_lock; | ||
152 | } pre_alloc; | ||
153 | |||
154 | struct { | ||
155 | struct nvgpu_list_node jobs; | ||
156 | struct nvgpu_spinlock lock; | ||
157 | } dynamic; | ||
158 | |||
159 | /* | ||
160 | * Synchronize abort cleanup (when closing a channel) and job cleanup | ||
161 | * (asynchronously from worker) - protect from concurrent access when | ||
162 | * job resources are being freed. | ||
163 | */ | ||
164 | struct nvgpu_mutex cleanup_lock; | ||
165 | }; | ||
166 | |||
167 | struct channel_gk20a_timeout { | ||
168 | /* lock protects the running timer state */ | ||
169 | struct nvgpu_raw_spinlock lock; | ||
170 | struct nvgpu_timeout timer; | ||
171 | bool running; | ||
172 | u32 gp_get; | ||
173 | u64 pb_get; | ||
174 | |||
175 | /* lock not needed */ | ||
176 | u32 limit_ms; | ||
177 | bool enabled; | ||
178 | bool debug_dump; | ||
179 | }; | ||
180 | |||
181 | /* | ||
182 | * Track refcount actions, saving their stack traces. This number specifies how | ||
183 | * many most recent actions are stored in a buffer. Set to 0 to disable. 128 | ||
184 | * should be enough to track moderately hard problems from the start. | ||
185 | */ | ||
186 | #define GK20A_CHANNEL_REFCOUNT_TRACKING 0 | ||
187 | /* Stack depth for the saved actions. */ | ||
188 | #define GK20A_CHANNEL_REFCOUNT_TRACKING_STACKLEN 8 | ||
189 | |||
190 | /* | ||
191 | * Because the puts and gets are not linked together explicitly (although they | ||
192 | * should always come in pairs), it's not possible to tell which ref holder to | ||
193 | * delete from the list when doing a put. So, just store some number of most | ||
194 | * recent gets and puts in a ring buffer, to obtain a history. | ||
195 | * | ||
196 | * These are zeroed when a channel is closed, so a new one starts fresh. | ||
197 | */ | ||
198 | |||
199 | enum channel_gk20a_ref_action_type { | ||
200 | channel_gk20a_ref_action_get, | ||
201 | channel_gk20a_ref_action_put | ||
202 | }; | ||
203 | |||
204 | #if GK20A_CHANNEL_REFCOUNT_TRACKING | ||
205 | |||
206 | #include <linux/stacktrace.h> | ||
207 | |||
208 | struct channel_gk20a_ref_action { | ||
209 | enum channel_gk20a_ref_action_type type; | ||
210 | s64 timestamp_ms; | ||
211 | /* | ||
212 | * Many of these traces will be similar. Simpler to just capture | ||
213 | * duplicates than to have a separate database for the entries. | ||
214 | */ | ||
215 | struct stack_trace trace; | ||
216 | unsigned long trace_entries[GK20A_CHANNEL_REFCOUNT_TRACKING_STACKLEN]; | ||
217 | }; | ||
218 | #endif | ||
219 | |||
220 | /* this is the priv element of struct nvhost_channel */ | ||
221 | struct channel_gk20a { | ||
222 | struct gk20a *g; /* set only when channel is active */ | ||
223 | |||
224 | struct nvgpu_list_node free_chs; | ||
225 | |||
226 | struct nvgpu_spinlock ref_obtain_lock; | ||
227 | nvgpu_atomic_t ref_count; | ||
228 | struct nvgpu_cond ref_count_dec_wq; | ||
229 | #if GK20A_CHANNEL_REFCOUNT_TRACKING | ||
230 | /* | ||
231 | * Ring buffer for most recent refcount gets and puts. Protected by | ||
232 | * ref_actions_lock when getting or putting refs (i.e., adding | ||
233 | * entries), and when reading entries. | ||
234 | */ | ||
235 | struct channel_gk20a_ref_action ref_actions[ | ||
236 | GK20A_CHANNEL_REFCOUNT_TRACKING]; | ||
237 | size_t ref_actions_put; /* index of next write */ | ||
238 | struct nvgpu_spinlock ref_actions_lock; | ||
239 | #endif | ||
240 | |||
241 | struct nvgpu_semaphore_int *hw_sema; | ||
242 | |||
243 | nvgpu_atomic_t bound; | ||
244 | |||
245 | int chid; | ||
246 | int tsgid; | ||
247 | pid_t pid; | ||
248 | pid_t tgid; | ||
249 | struct nvgpu_mutex ioctl_lock; | ||
250 | |||
251 | struct nvgpu_list_node ch_entry; /* channel's entry in TSG */ | ||
252 | |||
253 | struct channel_gk20a_joblist joblist; | ||
254 | struct nvgpu_allocator fence_allocator; | ||
255 | |||
256 | struct vm_gk20a *vm; | ||
257 | |||
258 | struct gpfifo_desc gpfifo; | ||
259 | |||
260 | struct nvgpu_mem usermode_userd; /* Used for Usermode Submission */ | ||
261 | struct nvgpu_mem usermode_gpfifo; | ||
262 | struct nvgpu_mem inst_block; | ||
263 | |||
264 | u64 userd_iova; | ||
265 | u64 userd_gpu_va; | ||
266 | |||
267 | struct priv_cmd_queue priv_cmd_q; | ||
268 | |||
269 | struct nvgpu_cond notifier_wq; | ||
270 | struct nvgpu_cond semaphore_wq; | ||
271 | |||
272 | /* kernel watchdog to kill stuck jobs */ | ||
273 | struct channel_gk20a_timeout timeout; | ||
274 | |||
275 | /* for job cleanup handling in the background worker */ | ||
276 | struct nvgpu_list_node worker_item; | ||
277 | |||
278 | #if defined(CONFIG_GK20A_CYCLE_STATS) | ||
279 | struct { | ||
280 | void *cyclestate_buffer; | ||
281 | u32 cyclestate_buffer_size; | ||
282 | struct nvgpu_mutex cyclestate_buffer_mutex; | ||
283 | } cyclestate; | ||
284 | |||
285 | struct nvgpu_mutex cs_client_mutex; | ||
286 | struct gk20a_cs_snapshot_client *cs_client; | ||
287 | #endif | ||
288 | struct nvgpu_mutex dbg_s_lock; | ||
289 | struct nvgpu_list_node dbg_s_list; | ||
290 | |||
291 | struct nvgpu_mutex sync_lock; | ||
292 | struct gk20a_channel_sync *sync; | ||
293 | struct gk20a_channel_sync *user_sync; | ||
294 | |||
295 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION | ||
296 | u64 virt_ctx; | ||
297 | #endif | ||
298 | |||
299 | struct nvgpu_mem ctx_header; | ||
300 | |||
301 | /* Any operating system specific data. */ | ||
302 | void *os_priv; | ||
303 | |||
304 | u32 obj_class; /* we support only one obj per channel */ | ||
305 | |||
306 | u32 timeout_accumulated_ms; | ||
307 | u32 timeout_gpfifo_get; | ||
308 | |||
309 | u32 subctx_id; | ||
310 | u32 runqueue_sel; | ||
311 | |||
312 | u32 timeout_ms_max; | ||
313 | u32 runlist_id; | ||
314 | |||
315 | bool mmu_nack_handled; | ||
316 | bool has_timedout; | ||
317 | bool referenceable; | ||
318 | bool vpr; | ||
319 | bool deterministic; | ||
320 | /* deterministic, but explicitly idle and submits disallowed */ | ||
321 | bool deterministic_railgate_allowed; | ||
322 | bool cde; | ||
323 | bool usermode_submit_enabled; | ||
324 | bool timeout_debug_dump; | ||
325 | bool has_os_fence_framework_support; | ||
326 | |||
327 | bool is_privileged_channel; | ||
328 | }; | ||
329 | |||
330 | static inline struct channel_gk20a * | ||
331 | channel_gk20a_from_free_chs(struct nvgpu_list_node *node) | ||
332 | { | ||
333 | return (struct channel_gk20a *) | ||
334 | ((uintptr_t)node - offsetof(struct channel_gk20a, free_chs)); | ||
335 | }; | ||
336 | |||
337 | static inline struct channel_gk20a * | ||
338 | channel_gk20a_from_ch_entry(struct nvgpu_list_node *node) | ||
339 | { | ||
340 | return (struct channel_gk20a *) | ||
341 | ((uintptr_t)node - offsetof(struct channel_gk20a, ch_entry)); | ||
342 | }; | ||
343 | |||
344 | static inline struct channel_gk20a * | ||
345 | channel_gk20a_from_worker_item(struct nvgpu_list_node *node) | ||
346 | { | ||
347 | return (struct channel_gk20a *) | ||
348 | ((uintptr_t)node - offsetof(struct channel_gk20a, worker_item)); | ||
349 | }; | ||
350 | |||
351 | static inline bool gk20a_channel_as_bound(struct channel_gk20a *ch) | ||
352 | { | ||
353 | return !!ch->vm; | ||
354 | } | ||
355 | int channel_gk20a_commit_va(struct channel_gk20a *c); | ||
356 | int gk20a_init_channel_support(struct gk20a *, u32 chid); | ||
357 | |||
358 | /* must be inside gk20a_busy()..gk20a_idle() */ | ||
359 | void gk20a_channel_close(struct channel_gk20a *ch); | ||
360 | void __gk20a_channel_kill(struct channel_gk20a *ch); | ||
361 | |||
362 | bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch, | ||
363 | u32 timeout_delta_ms, bool *progress); | ||
364 | void gk20a_disable_channel(struct channel_gk20a *ch); | ||
365 | void gk20a_channel_abort(struct channel_gk20a *ch, bool channel_preempt); | ||
366 | void gk20a_channel_abort_clean_up(struct channel_gk20a *ch); | ||
367 | void gk20a_channel_semaphore_wakeup(struct gk20a *g, bool post_events); | ||
368 | int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 size, | ||
369 | struct priv_cmd_entry *entry); | ||
370 | int gk20a_free_priv_cmdbuf(struct channel_gk20a *c, struct priv_cmd_entry *e); | ||
371 | |||
372 | int gk20a_enable_channel_tsg(struct gk20a *g, struct channel_gk20a *ch); | ||
373 | int gk20a_disable_channel_tsg(struct gk20a *g, struct channel_gk20a *ch); | ||
374 | |||
375 | int gk20a_channel_suspend(struct gk20a *g); | ||
376 | int gk20a_channel_resume(struct gk20a *g); | ||
377 | |||
378 | void gk20a_channel_deterministic_idle(struct gk20a *g); | ||
379 | void gk20a_channel_deterministic_unidle(struct gk20a *g); | ||
380 | |||
381 | int nvgpu_channel_worker_init(struct gk20a *g); | ||
382 | void nvgpu_channel_worker_deinit(struct gk20a *g); | ||
383 | |||
384 | struct channel_gk20a *gk20a_get_channel_from_file(int fd); | ||
385 | void gk20a_channel_update(struct channel_gk20a *c); | ||
386 | |||
387 | /* returns ch if reference was obtained */ | ||
388 | struct channel_gk20a *__must_check _gk20a_channel_get(struct channel_gk20a *ch, | ||
389 | const char *caller); | ||
390 | #define gk20a_channel_get(ch) _gk20a_channel_get(ch, __func__) | ||
391 | |||
392 | |||
393 | void _gk20a_channel_put(struct channel_gk20a *ch, const char *caller); | ||
394 | #define gk20a_channel_put(ch) _gk20a_channel_put(ch, __func__) | ||
395 | |||
396 | int gk20a_wait_channel_idle(struct channel_gk20a *ch); | ||
397 | |||
398 | /* runlist_id -1 is synonym for ENGINE_GR_GK20A runlist id */ | ||
399 | struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g, | ||
400 | s32 runlist_id, | ||
401 | bool is_privileged_channel, | ||
402 | pid_t pid, pid_t tid); | ||
403 | |||
404 | int gk20a_channel_alloc_gpfifo(struct channel_gk20a *c, | ||
405 | struct nvgpu_gpfifo_args *gpfifo_args); | ||
406 | |||
407 | void gk20a_channel_timeout_restart_all_channels(struct gk20a *g); | ||
408 | |||
409 | bool channel_gk20a_is_prealloc_enabled(struct channel_gk20a *c); | ||
410 | void channel_gk20a_joblist_lock(struct channel_gk20a *c); | ||
411 | void channel_gk20a_joblist_unlock(struct channel_gk20a *c); | ||
412 | bool channel_gk20a_joblist_is_empty(struct channel_gk20a *c); | ||
413 | |||
414 | int channel_gk20a_update_runlist(struct channel_gk20a *c, bool add); | ||
415 | int gk20a_channel_get_timescale_from_timeslice(struct gk20a *g, | ||
416 | unsigned int timeslice_period, | ||
417 | unsigned int *__timeslice_timeout, unsigned int *__timeslice_scale); | ||
418 | |||
419 | void gk20a_wait_until_counter_is_N( | ||
420 | struct channel_gk20a *ch, nvgpu_atomic_t *counter, int wait_value, | ||
421 | struct nvgpu_cond *c, const char *caller, const char *counter_name); | ||
422 | int channel_gk20a_alloc_job(struct channel_gk20a *c, | ||
423 | struct channel_gk20a_job **job_out); | ||
424 | void channel_gk20a_free_job(struct channel_gk20a *c, | ||
425 | struct channel_gk20a_job *job); | ||
426 | u32 nvgpu_get_gp_free_count(struct channel_gk20a *c); | ||
427 | u32 nvgpu_gp_free_count(struct channel_gk20a *c); | ||
428 | int gk20a_channel_add_job(struct channel_gk20a *c, | ||
429 | struct channel_gk20a_job *job, | ||
430 | bool skip_buffer_refcounting); | ||
431 | void free_priv_cmdbuf(struct channel_gk20a *c, | ||
432 | struct priv_cmd_entry *e); | ||
433 | void gk20a_channel_clean_up_jobs(struct channel_gk20a *c, | ||
434 | bool clean_all); | ||
435 | |||
436 | void gk20a_channel_free_usermode_buffers(struct channel_gk20a *c); | ||
437 | u32 nvgpu_get_gpfifo_entry_size(void); | ||
33 | 438 | ||
34 | int nvgpu_submit_channel_gpfifo_user(struct channel_gk20a *c, | 439 | int nvgpu_submit_channel_gpfifo_user(struct channel_gk20a *c, |
35 | struct nvgpu_gpfifo_userdata userdata, | 440 | struct nvgpu_gpfifo_userdata userdata, |
@@ -47,9 +452,9 @@ int nvgpu_submit_channel_gpfifo_kernel(struct channel_gk20a *c, | |||
47 | struct gk20a_fence **fence_out); | 452 | struct gk20a_fence **fence_out); |
48 | 453 | ||
49 | #ifdef CONFIG_DEBUG_FS | 454 | #ifdef CONFIG_DEBUG_FS |
50 | void trace_write_pushbuffers(struct channel_gk20a *c, int count); | 455 | void trace_write_pushbuffers(struct channel_gk20a *c, u32 count); |
51 | #else | 456 | #else |
52 | static inline void trace_write_pushbuffers(struct channel_gk20a *c, int count) | 457 | static inline void trace_write_pushbuffers(struct channel_gk20a *c, u32 count) |
53 | { | 458 | { |
54 | } | 459 | } |
55 | #endif | 460 | #endif |