include/nvgpu/channel.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478

/*
 * Copyright (c) 2018-2020, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */

#ifndef NVGPU_CHANNEL_H
#define NVGPU_CHANNEL_H

#include <nvgpu/list.h>
#include <nvgpu/lock.h>
#include <nvgpu/timers.h>
#include <nvgpu/cond.h>
#include <nvgpu/atomic.h>
#include <nvgpu/nvgpu_mem.h>
#include <nvgpu/allocator.h>

struct gk20a;
struct dbg_session_gk20a;
struct gk20a_fence;
struct fifo_profile_gk20a;
struct nvgpu_channel_sync;
struct nvgpu_gpfifo_userdata;

/* Flags to be passed to nvgpu_channel_setup_bind() */
#define NVGPU_SETUP_BIND_FLAGS_SUPPORT_VPR		(1U << 0U)
#define NVGPU_SETUP_BIND_FLAGS_SUPPORT_DETERMINISTIC	(1U << 1U)
#define NVGPU_SETUP_BIND_FLAGS_REPLAYABLE_FAULTS_ENABLE	(1U << 2U)
#define NVGPU_SETUP_BIND_FLAGS_USERMODE_SUPPORT		(1U << 3U)

/* Flags to be passed to nvgpu_submit_channel_gpfifo() */
#define NVGPU_SUBMIT_FLAGS_FENCE_WAIT	(1U << 0U)
#define NVGPU_SUBMIT_FLAGS_FENCE_GET	(1U << 1U)
#define NVGPU_SUBMIT_FLAGS_HW_FORMAT	(1U << 2U)
#define NVGPU_SUBMIT_FLAGS_SYNC_FENCE	(1U << 3U)
#define NVGPU_SUBMIT_FLAGS_SUPPRESS_WFI	(1U << 4U)
#define NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING	(1U << 5U)

/*
 * The binary format of 'struct nvgpu_channel_fence' introduced here
 * should match that of 'struct nvgpu_fence' defined in uapi header, since
 * this struct is intended to be a mirror copy of the uapi struct. This is
 * not a hard requirement though because of nvgpu_get_fence_args conversion
 * function.
 */
struct nvgpu_channel_fence {
	u32 id;
	u32 value;
};

/*
 * The binary format of 'struct nvgpu_gpfifo_entry' introduced here
 * should match that of 'struct nvgpu_gpfifo' defined in uapi header, since
 * this struct is intended to be a mirror copy of the uapi struct. This is
 * a rigid requirement because there's no conversion function and there are
 * memcpy's present between the user gpfifo (of type nvgpu_gpfifo) and the
 * kern gpfifo (of type nvgpu_gpfifo_entry).
 */
struct nvgpu_gpfifo_entry {
	u32 entry0;
	u32 entry1;
};

struct gpfifo_desc {
	struct nvgpu_mem mem;
	u32 entry_num;

	u32 get;
	u32 put;

	bool wrap;

	/* if gpfifo lives in vidmem or is forced to go via PRAMIN, first copy
	 * from userspace to pipe and then from pipe to gpu buffer */
	void *pipe;
};

struct nvgpu_setup_bind_args {
	u32 num_gpfifo_entries;
	u32 num_inflight_jobs;
	u32 userd_dmabuf_fd;
	u64 userd_dmabuf_offset;
	u32 gpfifo_dmabuf_fd;
	u64 gpfifo_dmabuf_offset;
	u32 work_submit_token;
	u32 flags;
};

struct notification {
	struct {
		u32 nanoseconds[2];
	} timestamp;
	u32 info32;
	u16 info16;
	u16 status;
};

struct priv_cmd_queue {
	struct nvgpu_mem mem;
	u32 size;	/* num of entries in words */
	u32 put;	/* put for priv cmd queue */
	u32 get;	/* get for priv cmd queue */
};

struct priv_cmd_entry {
	bool valid;
	struct nvgpu_mem *mem;
	u32 off;	/* offset in mem, in u32 entries */
	u64 gva;
	u32 get;	/* start of entry in queue */
	u32 size;	/* in words */
};

struct channel_gk20a_job {
	struct nvgpu_mapped_buf **mapped_buffers;
	int num_mapped_buffers;
	struct gk20a_fence *post_fence;
	struct priv_cmd_entry *wait_cmd;
	struct priv_cmd_entry *incr_cmd;
	struct nvgpu_list_node list;
};

static inline struct channel_gk20a_job *
channel_gk20a_job_from_list(struct nvgpu_list_node *node)
{
	return (struct channel_gk20a_job *)
	((uintptr_t)node - offsetof(struct channel_gk20a_job, list));
};

struct channel_gk20a_joblist {
	struct {
		bool enabled;
		unsigned int length;
		unsigned int put;
		unsigned int get;
		struct channel_gk20a_job *jobs;
		struct nvgpu_mutex read_lock;
	} pre_alloc;

	struct {
		struct nvgpu_list_node jobs;
		struct nvgpu_spinlock lock;
	} dynamic;

	/*
	 * Synchronize abort cleanup (when closing a channel) and job cleanup
	 * (asynchronously from worker) - protect from concurrent access when
	 * job resources are being freed.
	 */
	struct nvgpu_mutex cleanup_lock;
};

struct channel_gk20a_timeout {
	/* lock protects the running timer state */
	struct nvgpu_spinlock lock;
	struct nvgpu_timeout timer;
	bool running;
	u32 gp_get;
	u64 pb_get;

	/* lock not needed */
	u32 limit_ms;
	bool enabled;
	bool debug_dump;
};

/*
 * Track refcount actions, saving their stack traces. This number specifies how
 * many most recent actions are stored in a buffer. Set to 0 to disable. 128
 * should be enough to track moderately hard problems from the start.
 */
#define GK20A_CHANNEL_REFCOUNT_TRACKING 0
/* Stack depth for the saved actions. */
#define GK20A_CHANNEL_REFCOUNT_TRACKING_STACKLEN 8

/*
 * Because the puts and gets are not linked together explicitly (although they
 * should always come in pairs), it's not possible to tell which ref holder to
 * delete from the list when doing a put. So, just store some number of most
 * recent gets and puts in a ring buffer, to obtain a history.
 *
 * These are zeroed when a channel is closed, so a new one starts fresh.
 */

enum channel_gk20a_ref_action_type {
	channel_gk20a_ref_action_get,
	channel_gk20a_ref_action_put
};

#if GK20A_CHANNEL_REFCOUNT_TRACKING

#include <linux/stacktrace.h>

struct channel_gk20a_ref_action {
	enum channel_gk20a_ref_action_type type;
	s64 timestamp_ms;
	/*
	 * Many of these traces will be similar. Simpler to just capture
	 * duplicates than to have a separate database for the entries.
	 */
	struct stack_trace trace;
	unsigned long trace_entries[GK20A_CHANNEL_REFCOUNT_TRACKING_STACKLEN];
};
#endif

/* this is the priv element of struct nvhost_channel */
struct channel_gk20a {
	struct gk20a *g; /* set only when channel is active */

	struct nvgpu_list_node free_chs;

	struct nvgpu_spinlock ref_obtain_lock;
	nvgpu_atomic_t ref_count;
	struct nvgpu_cond ref_count_dec_wq;
#if GK20A_CHANNEL_REFCOUNT_TRACKING
	/*
	 * Ring buffer for most recent refcount gets and puts. Protected by
	 * ref_actions_lock when getting or putting refs (i.e., adding
	 * entries), and when reading entries.
	 */
	struct channel_gk20a_ref_action ref_actions[
		GK20A_CHANNEL_REFCOUNT_TRACKING];
	size_t ref_actions_put; /* index of next write */
	struct nvgpu_spinlock ref_actions_lock;
#endif

	struct nvgpu_semaphore_int *hw_sema;

	nvgpu_atomic_t bound;

	u32 chid;
	u32 tsgid;
	pid_t pid;
	pid_t tgid;
	struct nvgpu_mutex ioctl_lock;

	struct nvgpu_list_node ch_entry; /* channel's entry in TSG */

	struct channel_gk20a_joblist joblist;
	struct nvgpu_allocator fence_allocator;

	struct vm_gk20a *vm;

	struct gpfifo_desc gpfifo;

	struct nvgpu_mem usermode_userd; /* Used for Usermode Submission */
	struct nvgpu_mem usermode_gpfifo;
	struct nvgpu_mem inst_block;

	u64 userd_iova;
	u64 userd_gpu_va;

	struct priv_cmd_queue priv_cmd_q;

	struct nvgpu_cond notifier_wq;
	struct nvgpu_cond semaphore_wq;

	/* kernel watchdog to kill stuck jobs */
	struct channel_gk20a_timeout timeout;

	/* for job cleanup handling in the background worker */
	struct nvgpu_list_node worker_item;

#if defined(CONFIG_GK20A_CYCLE_STATS)
	struct {
		void *cyclestate_buffer;
		u32 cyclestate_buffer_size;
		struct nvgpu_mutex cyclestate_buffer_mutex;
	} cyclestate;

	struct nvgpu_mutex cs_client_mutex;
	struct gk20a_cs_snapshot_client *cs_client;
#endif
	struct nvgpu_mutex dbg_s_lock;
	struct nvgpu_list_node dbg_s_list;

	struct nvgpu_mutex sync_lock;
	struct nvgpu_channel_sync *sync;
	struct nvgpu_channel_sync *user_sync;

#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
	u64 virt_ctx;
#endif

	struct nvgpu_mem ctx_header;

	struct nvgpu_spinlock ch_timedout_lock;
	bool ch_timedout;
	/* Any operating system specific data. */
	void *os_priv;

	u32 obj_class;	/* we support only one obj per channel */

	u32 timeout_accumulated_ms;
	u32 timeout_gpfifo_get;

	u32 subctx_id;
	u32 runqueue_sel;

	u32 timeout_ms_max;
	u32 runlist_id;

	bool mmu_nack_handled;
	bool referenceable;
	bool vpr;
	bool deterministic;
	/* deterministic, but explicitly idle and submits disallowed */
	bool deterministic_railgate_allowed;
	bool cde;
	bool usermode_submit_enabled;
	bool timeout_debug_dump;
	bool has_os_fence_framework_support;

	bool is_privileged_channel;

	/**
	 * MMU Debugger Mode is enabled for this channel if refcnt > 0
	 */
	u32 mmu_debug_mode_refcnt;
};

static inline struct channel_gk20a *
channel_gk20a_from_free_chs(struct nvgpu_list_node *node)
{
	return (struct channel_gk20a *)
		   ((uintptr_t)node - offsetof(struct channel_gk20a, free_chs));
};

static inline struct channel_gk20a *
channel_gk20a_from_ch_entry(struct nvgpu_list_node *node)
{
	return (struct channel_gk20a *)
	   ((uintptr_t)node - offsetof(struct channel_gk20a, ch_entry));
};

static inline struct channel_gk20a *
channel_gk20a_from_worker_item(struct nvgpu_list_node *node)
{
	return (struct channel_gk20a *)
	   ((uintptr_t)node - offsetof(struct channel_gk20a, worker_item));
};

static inline bool gk20a_channel_as_bound(struct channel_gk20a *ch)
{
	return !!ch->vm;
}
int channel_gk20a_commit_va(struct channel_gk20a *c);
int gk20a_init_channel_support(struct gk20a *, u32 chid);

/* must be inside gk20a_busy()..gk20a_idle() */
void gk20a_channel_close(struct channel_gk20a *ch);
void __gk20a_channel_kill(struct channel_gk20a *ch);

bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch,
		u32 timeout_delta_ms, bool *progress);
void gk20a_disable_channel(struct channel_gk20a *ch);
void gk20a_channel_abort(struct channel_gk20a *ch, bool channel_preempt);
void gk20a_channel_abort_clean_up(struct channel_gk20a *ch);
void gk20a_channel_semaphore_wakeup(struct gk20a *g, bool post_events);
int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 size,
			     struct priv_cmd_entry *entry);
int gk20a_free_priv_cmdbuf(struct channel_gk20a *c, struct priv_cmd_entry *e);

int gk20a_enable_channel_tsg(struct gk20a *g, struct channel_gk20a *ch);
int gk20a_disable_channel_tsg(struct gk20a *g, struct channel_gk20a *ch);

int gk20a_channel_suspend(struct gk20a *g);
int gk20a_channel_resume(struct gk20a *g);

void gk20a_channel_deterministic_idle(struct gk20a *g);
void gk20a_channel_deterministic_unidle(struct gk20a *g);

int nvgpu_channel_worker_init(struct gk20a *g);
void nvgpu_channel_worker_deinit(struct gk20a *g);

struct channel_gk20a *gk20a_get_channel_from_file(int fd);
void gk20a_channel_update(struct channel_gk20a *c);

/* returns ch if reference was obtained */
struct channel_gk20a *__must_check _gk20a_channel_get(struct channel_gk20a *ch,
						      const char *caller);
#define gk20a_channel_get(ch) _gk20a_channel_get(ch, __func__)


void _gk20a_channel_put(struct channel_gk20a *ch, const char *caller);
#define gk20a_channel_put(ch) _gk20a_channel_put(ch, __func__)

/* returns NULL if could not take a ref to the channel */
struct channel_gk20a *__must_check _gk20a_channel_from_id(struct gk20a *g,
		u32 chid, const char *caller);
#define gk20a_channel_from_id(g, chid) _gk20a_channel_from_id(g, chid, __func__)

int gk20a_wait_channel_idle(struct channel_gk20a *ch);

/* runlist_id -1 is synonym for ENGINE_GR_GK20A runlist id */
struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g,
		s32 runlist_id,
		bool is_privileged_channel,
		pid_t pid, pid_t tid);

int nvgpu_channel_setup_bind(struct channel_gk20a *c,
		struct nvgpu_setup_bind_args *args);

void gk20a_channel_timeout_restart_all_channels(struct gk20a *g);

bool channel_gk20a_is_prealloc_enabled(struct channel_gk20a *c);
void channel_gk20a_joblist_lock(struct channel_gk20a *c);
void channel_gk20a_joblist_unlock(struct channel_gk20a *c);
bool channel_gk20a_joblist_is_empty(struct channel_gk20a *c);

int channel_gk20a_update_runlist(struct channel_gk20a *c, bool add);
int gk20a_channel_get_timescale_from_timeslice(struct gk20a *g,
		unsigned int timeslice_period,
		unsigned int *__timeslice_timeout, unsigned int *__timeslice_scale);

void gk20a_wait_until_counter_is_N(
	struct channel_gk20a *ch, nvgpu_atomic_t *counter, int wait_value,
	struct nvgpu_cond *c, const char *caller, const char *counter_name);
int channel_gk20a_alloc_job(struct channel_gk20a *c,
		struct channel_gk20a_job **job_out);
void channel_gk20a_free_job(struct channel_gk20a *c,
		struct channel_gk20a_job *job);
u32 nvgpu_get_gp_free_count(struct channel_gk20a *c);
u32 nvgpu_gp_free_count(struct channel_gk20a *c);
int gk20a_channel_add_job(struct channel_gk20a *c,
				 struct channel_gk20a_job *job,
				 bool skip_buffer_refcounting);
void free_priv_cmdbuf(struct channel_gk20a *c,
			     struct priv_cmd_entry *e);
void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
					bool clean_all);

void gk20a_channel_free_usermode_buffers(struct channel_gk20a *c);
u32 nvgpu_get_gpfifo_entry_size(void);

int nvgpu_submit_channel_gpfifo_user(struct channel_gk20a *c,
				struct nvgpu_gpfifo_userdata userdata,
				u32 num_entries,
				u32 flags,
				struct nvgpu_channel_fence *fence,
				struct gk20a_fence **fence_out,
				struct fifo_profile_gk20a *profile);

int nvgpu_submit_channel_gpfifo_kernel(struct channel_gk20a *c,
				struct nvgpu_gpfifo_entry *gpfifo,
				u32 num_entries,
				u32 flags,
				struct nvgpu_channel_fence *fence,
				struct gk20a_fence **fence_out);

#ifdef CONFIG_DEBUG_FS
void trace_write_pushbuffers(struct channel_gk20a *c, u32 count);
#else
static inline void trace_write_pushbuffers(struct channel_gk20a *c, u32 count)
{
}
#endif

void gk20a_channel_set_timedout(struct channel_gk20a *ch);
bool gk20a_channel_check_timedout(struct channel_gk20a *ch);

#endif