1 files changed, 411 insertions, 6 deletions
diff --git a/drivers/gpu/nvgpu/include/nvgpu/channel.h b/drivers/gpu/nvgpu/include/nvgpu/channel.h
index 7434f0e7..6cca843e 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/channel.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/channel.h
@@ -23,13 +23,418 @@
 #ifndef NVGPU_CHANNEL_H
 #define NVGPU_CHANNEL_H
-#include <nvgpu/types.h>
+#include <nvgpu/list.h>
+#include <nvgpu/lock.h>
+#include <nvgpu/timers.h>
+#include <nvgpu/cond.h>
+#include <nvgpu/atomic.h>
+#include <nvgpu/nvgpu_mem.h>
+#include <nvgpu/allocator.h>
-#include "gk20a/gk20a.h"
+struct gk20a;
+struct dbg_session_gk20a;
-struct nvgpu_channel_fence;
 struct gk20a_fence;
 struct fifo_profile_gk20a;
+struct gk20a_channel_sync;
+struct nvgpu_gpfifo_userdata;
+/* Flags to be passed to gk20a_channel_alloc_gpfifo() */
+#define NVGPU_GPFIFO_FLAGS_SUPPORT_VPR                  (1U << 0U)
+#define NVGPU_GPFIFO_FLAGS_SUPPORT_DETERMINISTIC        (1U << 1U)
+#define NVGPU_GPFIFO_FLAGS_REPLAYABLE_FAULTS_ENABLE     (1U << 2U)
+#define NVGPU_GPFIFO_FLAGS_USERMODE_SUPPORT             (1U << 3U)
+/* Flags to be passed to nvgpu_submit_channel_gpfifo() */
+#define NVGPU_SUBMIT_FLAGS_FENCE_WAIT   (1U << 0U)
+#define NVGPU_SUBMIT_FLAGS_FENCE_GET    (1U << 1U)
+#define NVGPU_SUBMIT_FLAGS_HW_FORMAT    (1U << 2U)
+#define NVGPU_SUBMIT_FLAGS_SYNC_FENCE   (1U << 3U)
+#define NVGPU_SUBMIT_FLAGS_SUPPRESS_WFI (1U << 4U)
+#define NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING      (1U << 5U)
+/*
+ * The binary format of 'struct nvgpu_channel_fence' introduced here
+ * should match that of 'struct nvgpu_fence' defined in uapi header, since
+ * this struct is intended to be a mirror copy of the uapi struct. This is
+ * not a hard requirement though because of nvgpu_get_fence_args conversion
+ * function.
+ */
+struct nvgpu_channel_fence {
+        u32 id;
+        u32 value;
+};
+/*
+ * The binary format of 'struct nvgpu_gpfifo_entry' introduced here
+ * should match that of 'struct nvgpu_gpfifo' defined in uapi header, since
+ * this struct is intended to be a mirror copy of the uapi struct. This is
+ * a rigid requirement because there's no conversion function and there are
+ * memcpy's present between the user gpfifo (of type nvgpu_gpfifo) and the
+ * kern gpfifo (of type nvgpu_gpfifo_entry).
+ */
+struct nvgpu_gpfifo_entry {
+        u32 entry0;
+        u32 entry1;
+};
+struct gpfifo_desc {
+        struct nvgpu_mem mem;
+        u32 entry_num;
+        u32 get;
+        u32 put;
+        bool wrap;
+        /* if gpfifo lives in vidmem or is forced to go via PRAMIN, first copy
+         * from userspace to pipe and then from pipe to gpu buffer */
+        void *pipe;
+};
+struct nvgpu_gpfifo_args {
+        u32 num_entries;
+        u32 num_inflight_jobs;
+        u32 userd_dmabuf_fd;
+        u32 gpfifo_dmabuf_fd;
+        u32 work_submit_token;
+        u32 flags;
+};
+struct notification {
+        struct {
+                u32 nanoseconds[2];
+        } timestamp;
+        u32 info32;
+        u16 info16;
+        u16 status;
+};
+struct priv_cmd_queue {
+        struct nvgpu_mem mem;
+        u32 size;       /* num of entries in words */
+        u32 put;        /* put for priv cmd queue */
+        u32 get;        /* get for priv cmd queue */
+};
+struct priv_cmd_entry {
+        bool valid;
+        struct nvgpu_mem *mem;
+        u32 off;        /* offset in mem, in u32 entries */
+        u64 gva;
+        u32 get;        /* start of entry in queue */
+        u32 size;       /* in words */
+};
+struct channel_gk20a_job {
+        struct nvgpu_mapped_buf **mapped_buffers;
+        int num_mapped_buffers;
+        struct gk20a_fence *post_fence;
+        struct priv_cmd_entry *wait_cmd;
+        struct priv_cmd_entry *incr_cmd;
+        struct nvgpu_list_node list;
+};
+static inline struct channel_gk20a_job *
+channel_gk20a_job_from_list(struct nvgpu_list_node *node)
+{
+        return (struct channel_gk20a_job *)
+        ((uintptr_t)node - offsetof(struct channel_gk20a_job, list));
+};
+struct channel_gk20a_joblist {
+        struct {
+                bool enabled;
+                unsigned int length;
+                unsigned int put;
+                unsigned int get;
+                struct channel_gk20a_job *jobs;
+                struct nvgpu_mutex read_lock;
+        } pre_alloc;
+        struct {
+                struct nvgpu_list_node jobs;
+                struct nvgpu_spinlock lock;
+        } dynamic;
+        /*
+         * Synchronize abort cleanup (when closing a channel) and job cleanup
+         * (asynchronously from worker) - protect from concurrent access when
+         * job resources are being freed.
+         */
+        struct nvgpu_mutex cleanup_lock;
+};
+struct channel_gk20a_timeout {
+        /* lock protects the running timer state */
+        struct nvgpu_raw_spinlock lock;
+        struct nvgpu_timeout timer;
+        bool running;
+        u32 gp_get;
+        u64 pb_get;
+        /* lock not needed */
+        u32 limit_ms;
+        bool enabled;
+        bool debug_dump;
+};
+/*
+ * Track refcount actions, saving their stack traces. This number specifies how
+ * many most recent actions are stored in a buffer. Set to 0 to disable. 128
+ * should be enough to track moderately hard problems from the start.
+ */
+#define GK20A_CHANNEL_REFCOUNT_TRACKING 0
+/* Stack depth for the saved actions. */
+#define GK20A_CHANNEL_REFCOUNT_TRACKING_STACKLEN 8
+/*
+ * Because the puts and gets are not linked together explicitly (although they
+ * should always come in pairs), it's not possible to tell which ref holder to
+ * delete from the list when doing a put. So, just store some number of most
+ * recent gets and puts in a ring buffer, to obtain a history.
+ *
+ * These are zeroed when a channel is closed, so a new one starts fresh.
+ */
+enum channel_gk20a_ref_action_type {
+        channel_gk20a_ref_action_get,
+        channel_gk20a_ref_action_put
+};
+#if GK20A_CHANNEL_REFCOUNT_TRACKING
+#include <linux/stacktrace.h>
+struct channel_gk20a_ref_action {
+        enum channel_gk20a_ref_action_type type;
+        s64 timestamp_ms;
+        /*
+         * Many of these traces will be similar. Simpler to just capture
+         * duplicates than to have a separate database for the entries.
+         */
+        struct stack_trace trace;
+        unsigned long trace_entries[GK20A_CHANNEL_REFCOUNT_TRACKING_STACKLEN];
+};
+#endif
+/* this is the priv element of struct nvhost_channel */
+struct channel_gk20a {
+        struct gk20a *g; /* set only when channel is active */
+        struct nvgpu_list_node free_chs;
+        struct nvgpu_spinlock ref_obtain_lock;
+        nvgpu_atomic_t ref_count;
+        struct nvgpu_cond ref_count_dec_wq;
+#if GK20A_CHANNEL_REFCOUNT_TRACKING
+        /*
+         * Ring buffer for most recent refcount gets and puts. Protected by
+         * ref_actions_lock when getting or putting refs (i.e., adding
+         * entries), and when reading entries.
+         */
+        struct channel_gk20a_ref_action ref_actions[
+                GK20A_CHANNEL_REFCOUNT_TRACKING];
+        size_t ref_actions_put; /* index of next write */
+        struct nvgpu_spinlock ref_actions_lock;
+#endif
+        struct nvgpu_semaphore_int *hw_sema;
+        nvgpu_atomic_t bound;
+        int chid;
+        int tsgid;
+        pid_t pid;
+        pid_t tgid;
+        struct nvgpu_mutex ioctl_lock;
+        struct nvgpu_list_node ch_entry; /* channel's entry in TSG */
+        struct channel_gk20a_joblist joblist;
+        struct nvgpu_allocator fence_allocator;
+        struct vm_gk20a *vm;
+        struct gpfifo_desc gpfifo;
+        struct nvgpu_mem usermode_userd; /* Used for Usermode Submission */
+        struct nvgpu_mem usermode_gpfifo;
+        struct nvgpu_mem inst_block;
+        u64 userd_iova;
+        u64 userd_gpu_va;
+        struct priv_cmd_queue priv_cmd_q;
+        struct nvgpu_cond notifier_wq;
+        struct nvgpu_cond semaphore_wq;
+        /* kernel watchdog to kill stuck jobs */
+        struct channel_gk20a_timeout timeout;
+        /* for job cleanup handling in the background worker */
+        struct nvgpu_list_node worker_item;
+#if defined(CONFIG_GK20A_CYCLE_STATS)
+        struct {
+                void *cyclestate_buffer;
+                u32 cyclestate_buffer_size;
+                struct nvgpu_mutex cyclestate_buffer_mutex;
+        } cyclestate;
+        struct nvgpu_mutex cs_client_mutex;
+        struct gk20a_cs_snapshot_client *cs_client;
+#endif
+        struct nvgpu_mutex dbg_s_lock;
+        struct nvgpu_list_node dbg_s_list;
+        struct nvgpu_mutex sync_lock;
+        struct gk20a_channel_sync *sync;
+        struct gk20a_channel_sync *user_sync;
+#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
+        u64 virt_ctx;
+#endif
+        struct nvgpu_mem ctx_header;
+        /* Any operating system specific data. */
+        void *os_priv;
+        u32 obj_class;  /* we support only one obj per channel */
+        u32 timeout_accumulated_ms;
+        u32 timeout_gpfifo_get;
+        u32 subctx_id;
+        u32 runqueue_sel;
+        u32 timeout_ms_max;
+        u32 runlist_id;
+        bool mmu_nack_handled;
+        bool has_timedout;
+        bool referenceable;
+        bool vpr;
+        bool deterministic;
+        /* deterministic, but explicitly idle and submits disallowed */
+        bool deterministic_railgate_allowed;
+        bool cde;
+        bool usermode_submit_enabled;
+        bool timeout_debug_dump;
+        bool has_os_fence_framework_support;
+        bool is_privileged_channel;
+};
+static inline struct channel_gk20a *
+channel_gk20a_from_free_chs(struct nvgpu_list_node *node)
+{
+        return (struct channel_gk20a *)
+                   ((uintptr_t)node - offsetof(struct channel_gk20a, free_chs));
+};
+static inline struct channel_gk20a *
+channel_gk20a_from_ch_entry(struct nvgpu_list_node *node)
+{
+        return (struct channel_gk20a *)
+           ((uintptr_t)node - offsetof(struct channel_gk20a, ch_entry));
+};
+static inline struct channel_gk20a *
+channel_gk20a_from_worker_item(struct nvgpu_list_node *node)
+{
+        return (struct channel_gk20a *)
+           ((uintptr_t)node - offsetof(struct channel_gk20a, worker_item));
+};
+static inline bool gk20a_channel_as_bound(struct channel_gk20a *ch)
+{
+        return !!ch->vm;
+}
+int channel_gk20a_commit_va(struct channel_gk20a *c);
+int gk20a_init_channel_support(struct gk20a *, u32 chid);
+/* must be inside gk20a_busy()..gk20a_idle() */
+void gk20a_channel_close(struct channel_gk20a *ch);
+void __gk20a_channel_kill(struct channel_gk20a *ch);
+bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch,
+                u32 timeout_delta_ms, bool *progress);
+void gk20a_disable_channel(struct channel_gk20a *ch);
+void gk20a_channel_abort(struct channel_gk20a *ch, bool channel_preempt);
+void gk20a_channel_abort_clean_up(struct channel_gk20a *ch);
+void gk20a_channel_semaphore_wakeup(struct gk20a *g, bool post_events);
+int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 size,
+                             struct priv_cmd_entry *entry);
+int gk20a_free_priv_cmdbuf(struct channel_gk20a *c, struct priv_cmd_entry *e);
+int gk20a_enable_channel_tsg(struct gk20a *g, struct channel_gk20a *ch);
+int gk20a_disable_channel_tsg(struct gk20a *g, struct channel_gk20a *ch);
+int gk20a_channel_suspend(struct gk20a *g);
+int gk20a_channel_resume(struct gk20a *g);
+void gk20a_channel_deterministic_idle(struct gk20a *g);
+void gk20a_channel_deterministic_unidle(struct gk20a *g);
+int nvgpu_channel_worker_init(struct gk20a *g);
+void nvgpu_channel_worker_deinit(struct gk20a *g);
+struct channel_gk20a *gk20a_get_channel_from_file(int fd);
+void gk20a_channel_update(struct channel_gk20a *c);
+/* returns ch if reference was obtained */
+struct channel_gk20a *__must_check _gk20a_channel_get(struct channel_gk20a *ch,
+                                                      const char *caller);
+#define gk20a_channel_get(ch) _gk20a_channel_get(ch, __func__)
+void _gk20a_channel_put(struct channel_gk20a *ch, const char *caller);
+#define gk20a_channel_put(ch) _gk20a_channel_put(ch, __func__)
+int gk20a_wait_channel_idle(struct channel_gk20a *ch);
+/* runlist_id -1 is synonym for ENGINE_GR_GK20A runlist id */
+struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g,
+                s32 runlist_id,
+                bool is_privileged_channel,
+                pid_t pid, pid_t tid);
+int gk20a_channel_alloc_gpfifo(struct channel_gk20a *c,
+                struct nvgpu_gpfifo_args *gpfifo_args);
+void gk20a_channel_timeout_restart_all_channels(struct gk20a *g);
+bool channel_gk20a_is_prealloc_enabled(struct channel_gk20a *c);
+void channel_gk20a_joblist_lock(struct channel_gk20a *c);
+void channel_gk20a_joblist_unlock(struct channel_gk20a *c);
+bool channel_gk20a_joblist_is_empty(struct channel_gk20a *c);
+int channel_gk20a_update_runlist(struct channel_gk20a *c, bool add);
+int gk20a_channel_get_timescale_from_timeslice(struct gk20a *g,
+                unsigned int timeslice_period,
+                unsigned int *__timeslice_timeout, unsigned int *__timeslice_scale);
+void gk20a_wait_until_counter_is_N(
+        struct channel_gk20a *ch, nvgpu_atomic_t *counter, int wait_value,
+        struct nvgpu_cond *c, const char *caller, const char *counter_name);
+int channel_gk20a_alloc_job(struct channel_gk20a *c,
+                struct channel_gk20a_job **job_out);
+void channel_gk20a_free_job(struct channel_gk20a *c,
+                struct channel_gk20a_job *job);
+u32 nvgpu_get_gp_free_count(struct channel_gk20a *c);
+u32 nvgpu_gp_free_count(struct channel_gk20a *c);
+int gk20a_channel_add_job(struct channel_gk20a *c,
+                                 struct channel_gk20a_job *job,
+                                 bool skip_buffer_refcounting);
+void free_priv_cmdbuf(struct channel_gk20a *c,
+                             struct priv_cmd_entry *e);
+void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
+                                        bool clean_all);
+void gk20a_channel_free_usermode_buffers(struct channel_gk20a *c);
+u32 nvgpu_get_gpfifo_entry_size(void);
 int nvgpu_submit_channel_gpfifo_user(struct channel_gk20a *c,
                                struct nvgpu_gpfifo_userdata userdata,
@@ -47,9 +452,9 @@ int nvgpu_submit_channel_gpfifo_kernel(struct channel_gk20a *c,
                                struct gk20a_fence **fence_out);
 #ifdef CONFIG_DEBUG_FS
-void trace_write_pushbuffers(struct channel_gk20a *c, int count);
+void trace_write_pushbuffers(struct channel_gk20a *c, u32 count);
 #else
-static inline void trace_write_pushbuffers(struct channel_gk20a *c, int count)
+static inline void trace_write_pushbuffers(struct channel_gk20a *c, u32 count)
 {
 }
 #endif

diff --git a/drivers/gpu/nvgpu/include/nvgpu/channel.h b/drivers/gpu/nvgpu/include/nvgpu/channel.h index 7434f0e7..6cca843e 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/channel.h +++ b/drivers/gpu/nvgpu/include/nvgpu/channel.h
@@ -23,13 +23,418 @@
23	#ifndef NVGPU_CHANNEL_H	23	#ifndef NVGPU_CHANNEL_H
24	#define NVGPU_CHANNEL_H	24	#define NVGPU_CHANNEL_H
25		25
26	#include <nvgpu/types.h>	26	#include <nvgpu/list.h>
		27	#include <nvgpu/lock.h>
		28	#include <nvgpu/timers.h>
		29	#include <nvgpu/cond.h>
		30	#include <nvgpu/atomic.h>
		31	#include <nvgpu/nvgpu_mem.h>
		32	#include <nvgpu/allocator.h>
27		33
28	#include "gk20a/gk20a.h"	34	struct gk20a;
29		35	struct dbg_session_gk20a;
30	struct nvgpu_channel_fence;
31	struct gk20a_fence;	36	struct gk20a_fence;
32	struct fifo_profile_gk20a;	37	struct fifo_profile_gk20a;
		38	struct gk20a_channel_sync;
		39	struct nvgpu_gpfifo_userdata;
		40
		41	/* Flags to be passed to gk20a_channel_alloc_gpfifo() */
		42	#define NVGPU_GPFIFO_FLAGS_SUPPORT_VPR (1U << 0U)
		43	#define NVGPU_GPFIFO_FLAGS_SUPPORT_DETERMINISTIC (1U << 1U)
		44	#define NVGPU_GPFIFO_FLAGS_REPLAYABLE_FAULTS_ENABLE (1U << 2U)
		45	#define NVGPU_GPFIFO_FLAGS_USERMODE_SUPPORT (1U << 3U)
		46
		47	/* Flags to be passed to nvgpu_submit_channel_gpfifo() */
		48	#define NVGPU_SUBMIT_FLAGS_FENCE_WAIT (1U << 0U)
		49	#define NVGPU_SUBMIT_FLAGS_FENCE_GET (1U << 1U)
		50	#define NVGPU_SUBMIT_FLAGS_HW_FORMAT (1U << 2U)
		51	#define NVGPU_SUBMIT_FLAGS_SYNC_FENCE (1U << 3U)
		52	#define NVGPU_SUBMIT_FLAGS_SUPPRESS_WFI (1U << 4U)
		53	#define NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING (1U << 5U)
		54
		55	/*
		56	* The binary format of 'struct nvgpu_channel_fence' introduced here
		57	* should match that of 'struct nvgpu_fence' defined in uapi header, since
		58	* this struct is intended to be a mirror copy of the uapi struct. This is
		59	* not a hard requirement though because of nvgpu_get_fence_args conversion
		60	* function.
		61	*/
		62	struct nvgpu_channel_fence {
		63	u32 id;
		64	u32 value;
		65	};
		66
		67	/*
		68	* The binary format of 'struct nvgpu_gpfifo_entry' introduced here
		69	* should match that of 'struct nvgpu_gpfifo' defined in uapi header, since
		70	* this struct is intended to be a mirror copy of the uapi struct. This is
		71	* a rigid requirement because there's no conversion function and there are
		72	* memcpy's present between the user gpfifo (of type nvgpu_gpfifo) and the
		73	* kern gpfifo (of type nvgpu_gpfifo_entry).
		74	*/
		75	struct nvgpu_gpfifo_entry {
		76	u32 entry0;
		77	u32 entry1;
		78	};
		79
		80	struct gpfifo_desc {
		81	struct nvgpu_mem mem;
		82	u32 entry_num;
		83
		84	u32 get;
		85	u32 put;
		86
		87	bool wrap;
		88
		89	/* if gpfifo lives in vidmem or is forced to go via PRAMIN, first copy
		90	* from userspace to pipe and then from pipe to gpu buffer */
		91	void *pipe;
		92	};
		93
		94	struct nvgpu_gpfifo_args {
		95	u32 num_entries;
		96	u32 num_inflight_jobs;
		97	u32 userd_dmabuf_fd;
		98	u32 gpfifo_dmabuf_fd;
		99	u32 work_submit_token;
		100	u32 flags;
		101	};
		102
		103	struct notification {
		104	struct {
		105	u32 nanoseconds[2];
		106	} timestamp;
		107	u32 info32;
		108	u16 info16;
		109	u16 status;
		110	};
		111
		112	struct priv_cmd_queue {
		113	struct nvgpu_mem mem;
		114	u32 size; /* num of entries in words */
		115	u32 put; /* put for priv cmd queue */
		116	u32 get; /* get for priv cmd queue */
		117	};
		118
		119	struct priv_cmd_entry {
		120	bool valid;
		121	struct nvgpu_mem *mem;
		122	u32 off; /* offset in mem, in u32 entries */
		123	u64 gva;
		124	u32 get; /* start of entry in queue */
		125	u32 size; /* in words */
		126	};
		127
		128	struct channel_gk20a_job {
		129	struct nvgpu_mapped_buf **mapped_buffers;
		130	int num_mapped_buffers;
		131	struct gk20a_fence *post_fence;
		132	struct priv_cmd_entry *wait_cmd;
		133	struct priv_cmd_entry *incr_cmd;
		134	struct nvgpu_list_node list;
		135	};
		136
		137	static inline struct channel_gk20a_job *
		138	channel_gk20a_job_from_list(struct nvgpu_list_node *node)
		139	{
		140	return (struct channel_gk20a_job *)
		141	((uintptr_t)node - offsetof(struct channel_gk20a_job, list));
		142	};
		143
		144	struct channel_gk20a_joblist {
		145	struct {
		146	bool enabled;
		147	unsigned int length;
		148	unsigned int put;
		149	unsigned int get;
		150	struct channel_gk20a_job *jobs;
		151	struct nvgpu_mutex read_lock;
		152	} pre_alloc;
		153
		154	struct {
		155	struct nvgpu_list_node jobs;
		156	struct nvgpu_spinlock lock;
		157	} dynamic;
		158
		159	/*
		160	* Synchronize abort cleanup (when closing a channel) and job cleanup
		161	* (asynchronously from worker) - protect from concurrent access when
		162	* job resources are being freed.
		163	*/
		164	struct nvgpu_mutex cleanup_lock;
		165	};
		166
		167	struct channel_gk20a_timeout {
		168	/* lock protects the running timer state */
		169	struct nvgpu_raw_spinlock lock;
		170	struct nvgpu_timeout timer;
		171	bool running;
		172	u32 gp_get;
		173	u64 pb_get;
		174
		175	/* lock not needed */
		176	u32 limit_ms;
		177	bool enabled;
		178	bool debug_dump;
		179	};
		180
		181	/*
		182	* Track refcount actions, saving their stack traces. This number specifies how
		183	* many most recent actions are stored in a buffer. Set to 0 to disable. 128
		184	* should be enough to track moderately hard problems from the start.
		185	*/
		186	#define GK20A_CHANNEL_REFCOUNT_TRACKING 0
		187	/* Stack depth for the saved actions. */
		188	#define GK20A_CHANNEL_REFCOUNT_TRACKING_STACKLEN 8
		189
		190	/*
		191	* Because the puts and gets are not linked together explicitly (although they
		192	* should always come in pairs), it's not possible to tell which ref holder to
		193	* delete from the list when doing a put. So, just store some number of most
		194	* recent gets and puts in a ring buffer, to obtain a history.
		195	*
		196	* These are zeroed when a channel is closed, so a new one starts fresh.
		197	*/
		198
		199	enum channel_gk20a_ref_action_type {
		200	channel_gk20a_ref_action_get,
		201	channel_gk20a_ref_action_put
		202	};
		203
		204	#if GK20A_CHANNEL_REFCOUNT_TRACKING
		205
		206	#include <linux/stacktrace.h>
		207
		208	struct channel_gk20a_ref_action {
		209	enum channel_gk20a_ref_action_type type;
		210	s64 timestamp_ms;
		211	/*
		212	* Many of these traces will be similar. Simpler to just capture
		213	* duplicates than to have a separate database for the entries.
		214	*/
		215	struct stack_trace trace;
		216	unsigned long trace_entries[GK20A_CHANNEL_REFCOUNT_TRACKING_STACKLEN];
		217	};
		218	#endif
		219
		220	/* this is the priv element of struct nvhost_channel */
		221	struct channel_gk20a {
		222	struct gk20a g; / set only when channel is active */
		223
		224	struct nvgpu_list_node free_chs;
		225
		226	struct nvgpu_spinlock ref_obtain_lock;
		227	nvgpu_atomic_t ref_count;
		228	struct nvgpu_cond ref_count_dec_wq;
		229	#if GK20A_CHANNEL_REFCOUNT_TRACKING
		230	/*
		231	* Ring buffer for most recent refcount gets and puts. Protected by
		232	* ref_actions_lock when getting or putting refs (i.e., adding
		233	* entries), and when reading entries.
		234	*/
		235	struct channel_gk20a_ref_action ref_actions[
		236	GK20A_CHANNEL_REFCOUNT_TRACKING];
		237	size_t ref_actions_put; /* index of next write */
		238	struct nvgpu_spinlock ref_actions_lock;
		239	#endif
		240
		241	struct nvgpu_semaphore_int *hw_sema;
		242
		243	nvgpu_atomic_t bound;
		244
		245	int chid;
		246	int tsgid;
		247	pid_t pid;
		248	pid_t tgid;
		249	struct nvgpu_mutex ioctl_lock;
		250
		251	struct nvgpu_list_node ch_entry; /* channel's entry in TSG */
		252
		253	struct channel_gk20a_joblist joblist;
		254	struct nvgpu_allocator fence_allocator;
		255
		256	struct vm_gk20a *vm;
		257
		258	struct gpfifo_desc gpfifo;
		259
		260	struct nvgpu_mem usermode_userd; /* Used for Usermode Submission */
		261	struct nvgpu_mem usermode_gpfifo;
		262	struct nvgpu_mem inst_block;
		263
		264	u64 userd_iova;
		265	u64 userd_gpu_va;
		266
		267	struct priv_cmd_queue priv_cmd_q;
		268
		269	struct nvgpu_cond notifier_wq;
		270	struct nvgpu_cond semaphore_wq;
		271
		272	/* kernel watchdog to kill stuck jobs */
		273	struct channel_gk20a_timeout timeout;
		274
		275	/* for job cleanup handling in the background worker */
		276	struct nvgpu_list_node worker_item;
		277
		278	#if defined(CONFIG_GK20A_CYCLE_STATS)
		279	struct {
		280	void *cyclestate_buffer;
		281	u32 cyclestate_buffer_size;
		282	struct nvgpu_mutex cyclestate_buffer_mutex;
		283	} cyclestate;
		284
		285	struct nvgpu_mutex cs_client_mutex;
		286	struct gk20a_cs_snapshot_client *cs_client;
		287	#endif
		288	struct nvgpu_mutex dbg_s_lock;
		289	struct nvgpu_list_node dbg_s_list;
		290
		291	struct nvgpu_mutex sync_lock;
		292	struct gk20a_channel_sync *sync;
		293	struct gk20a_channel_sync *user_sync;
		294
		295	#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
		296	u64 virt_ctx;
		297	#endif
		298
		299	struct nvgpu_mem ctx_header;
		300
		301	/* Any operating system specific data. */
		302	void *os_priv;
		303
		304	u32 obj_class; /* we support only one obj per channel */
		305
		306	u32 timeout_accumulated_ms;
		307	u32 timeout_gpfifo_get;
		308
		309	u32 subctx_id;
		310	u32 runqueue_sel;
		311
		312	u32 timeout_ms_max;
		313	u32 runlist_id;
		314
		315	bool mmu_nack_handled;
		316	bool has_timedout;
		317	bool referenceable;
		318	bool vpr;
		319	bool deterministic;
		320	/* deterministic, but explicitly idle and submits disallowed */
		321	bool deterministic_railgate_allowed;
		322	bool cde;
		323	bool usermode_submit_enabled;
		324	bool timeout_debug_dump;
		325	bool has_os_fence_framework_support;
		326
		327	bool is_privileged_channel;
		328	};
		329
		330	static inline struct channel_gk20a *
		331	channel_gk20a_from_free_chs(struct nvgpu_list_node *node)
		332	{
		333	return (struct channel_gk20a *)
		334	((uintptr_t)node - offsetof(struct channel_gk20a, free_chs));
		335	};
		336
		337	static inline struct channel_gk20a *
		338	channel_gk20a_from_ch_entry(struct nvgpu_list_node *node)
		339	{
		340	return (struct channel_gk20a *)
		341	((uintptr_t)node - offsetof(struct channel_gk20a, ch_entry));
		342	};
		343
		344	static inline struct channel_gk20a *
		345	channel_gk20a_from_worker_item(struct nvgpu_list_node *node)
		346	{
		347	return (struct channel_gk20a *)
		348	((uintptr_t)node - offsetof(struct channel_gk20a, worker_item));
		349	};
		350
		351	static inline bool gk20a_channel_as_bound(struct channel_gk20a *ch)
		352	{
		353	return !!ch->vm;
		354	}
		355	int channel_gk20a_commit_va(struct channel_gk20a *c);
		356	int gk20a_init_channel_support(struct gk20a *, u32 chid);
		357
		358	/* must be inside gk20a_busy()..gk20a_idle() */
		359	void gk20a_channel_close(struct channel_gk20a *ch);
		360	void __gk20a_channel_kill(struct channel_gk20a *ch);
		361
		362	bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch,
		363	u32 timeout_delta_ms, bool *progress);
		364	void gk20a_disable_channel(struct channel_gk20a *ch);
		365	void gk20a_channel_abort(struct channel_gk20a *ch, bool channel_preempt);
		366	void gk20a_channel_abort_clean_up(struct channel_gk20a *ch);
		367	void gk20a_channel_semaphore_wakeup(struct gk20a *g, bool post_events);
		368	int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 size,
		369	struct priv_cmd_entry *entry);
		370	int gk20a_free_priv_cmdbuf(struct channel_gk20a c, struct priv_cmd_entry e);
		371
		372	int gk20a_enable_channel_tsg(struct gk20a g, struct channel_gk20a ch);
		373	int gk20a_disable_channel_tsg(struct gk20a g, struct channel_gk20a ch);
		374
		375	int gk20a_channel_suspend(struct gk20a *g);
		376	int gk20a_channel_resume(struct gk20a *g);
		377
		378	void gk20a_channel_deterministic_idle(struct gk20a *g);
		379	void gk20a_channel_deterministic_unidle(struct gk20a *g);
		380
		381	int nvgpu_channel_worker_init(struct gk20a *g);
		382	void nvgpu_channel_worker_deinit(struct gk20a *g);
		383
		384	struct channel_gk20a *gk20a_get_channel_from_file(int fd);
		385	void gk20a_channel_update(struct channel_gk20a *c);
		386
		387	/* returns ch if reference was obtained */
		388	struct channel_gk20a __must_check _gk20a_channel_get(struct channel_gk20a ch,
		389	const char *caller);
		390	#define gk20a_channel_get(ch) _gk20a_channel_get(ch, __func__)
		391
		392
		393	void _gk20a_channel_put(struct channel_gk20a ch, const char caller);
		394	#define gk20a_channel_put(ch) _gk20a_channel_put(ch, __func__)
		395
		396	int gk20a_wait_channel_idle(struct channel_gk20a *ch);
		397
		398	/* runlist_id -1 is synonym for ENGINE_GR_GK20A runlist id */
		399	struct channel_gk20a gk20a_open_new_channel(struct gk20a g,
		400	s32 runlist_id,
		401	bool is_privileged_channel,
		402	pid_t pid, pid_t tid);
		403
		404	int gk20a_channel_alloc_gpfifo(struct channel_gk20a *c,
		405	struct nvgpu_gpfifo_args *gpfifo_args);
		406
		407	void gk20a_channel_timeout_restart_all_channels(struct gk20a *g);
		408
		409	bool channel_gk20a_is_prealloc_enabled(struct channel_gk20a *c);
		410	void channel_gk20a_joblist_lock(struct channel_gk20a *c);
		411	void channel_gk20a_joblist_unlock(struct channel_gk20a *c);
		412	bool channel_gk20a_joblist_is_empty(struct channel_gk20a *c);
		413
		414	int channel_gk20a_update_runlist(struct channel_gk20a *c, bool add);
		415	int gk20a_channel_get_timescale_from_timeslice(struct gk20a *g,
		416	unsigned int timeslice_period,
		417	unsigned int __timeslice_timeout, unsigned int __timeslice_scale);
		418
		419	void gk20a_wait_until_counter_is_N(
		420	struct channel_gk20a ch, nvgpu_atomic_t counter, int wait_value,
		421	struct nvgpu_cond c, const char caller, const char *counter_name);
		422	int channel_gk20a_alloc_job(struct channel_gk20a *c,
		423	struct channel_gk20a_job **job_out);
		424	void channel_gk20a_free_job(struct channel_gk20a *c,
		425	struct channel_gk20a_job *job);
		426	u32 nvgpu_get_gp_free_count(struct channel_gk20a *c);
		427	u32 nvgpu_gp_free_count(struct channel_gk20a *c);
		428	int gk20a_channel_add_job(struct channel_gk20a *c,
		429	struct channel_gk20a_job *job,
		430	bool skip_buffer_refcounting);
		431	void free_priv_cmdbuf(struct channel_gk20a *c,
		432	struct priv_cmd_entry *e);
		433	void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
		434	bool clean_all);
		435
		436	void gk20a_channel_free_usermode_buffers(struct channel_gk20a *c);
		437	u32 nvgpu_get_gpfifo_entry_size(void);
33		438
34	int nvgpu_submit_channel_gpfifo_user(struct channel_gk20a *c,	439	int nvgpu_submit_channel_gpfifo_user(struct channel_gk20a *c,
35	struct nvgpu_gpfifo_userdata userdata,	440	struct nvgpu_gpfifo_userdata userdata,
@@ -47,9 +452,9 @@ int nvgpu_submit_channel_gpfifo_kernel(struct channel_gk20a *c,
47	struct gk20a_fence **fence_out);	452	struct gk20a_fence **fence_out);
48		453
49	#ifdef CONFIG_DEBUG_FS	454	#ifdef CONFIG_DEBUG_FS
50	void trace_write_pushbuffers(struct channel_gk20a *c, int count);	455	void trace_write_pushbuffers(struct channel_gk20a *c, u32 count);
51	#else	456	#else
52	static inline void trace_write_pushbuffers(struct channel_gk20a *c, int count)	457	static inline void trace_write_pushbuffers(struct channel_gk20a *c, u32 count)
53	{	458	{
54	}	459	}
55	#endif	460	#endif