From 34d552957dd0e3e3363067fc9b9af64281d29396 Mon Sep 17 00:00:00 2001
From: Konsta Holtta <kholtta@nvidia.com>
Date: Fri, 31 Aug 2018 12:59:37 +0300
Subject: gpu: nvgpu: move channel header to common

channel_gk20a is clear from chip specifics and from most dependencies,
so move it under the common directory.

Jira NVGPU-967

Change-Id: I41f2160b96d4ec84064288ecc22bb360e82352df
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1810578
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/include/nvgpu/channel.h | 417 +++++++++++++++++++++++++++++-
 1 file changed, 411 insertions(+), 6 deletions(-)

(limited to 'drivers/gpu/nvgpu/include')

diff --git a/drivers/gpu/nvgpu/include/nvgpu/channel.h b/drivers/gpu/nvgpu/include/nvgpu/channel.h
index 7434f0e7..6cca843e 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/channel.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/channel.h
@@ -23,13 +23,418 @@
 #ifndef NVGPU_CHANNEL_H
 #define NVGPU_CHANNEL_H
 
-#include <nvgpu/types.h>
+#include <nvgpu/list.h>
+#include <nvgpu/lock.h>
+#include <nvgpu/timers.h>
+#include <nvgpu/cond.h>
+#include <nvgpu/atomic.h>
+#include <nvgpu/nvgpu_mem.h>
+#include <nvgpu/allocator.h>
 
-#include "gk20a/gk20a.h"
-
-struct nvgpu_channel_fence;
+struct gk20a;
+struct dbg_session_gk20a;
 struct gk20a_fence;
 struct fifo_profile_gk20a;
+struct gk20a_channel_sync;
+struct nvgpu_gpfifo_userdata;
+
+/* Flags to be passed to gk20a_channel_alloc_gpfifo() */
+#define NVGPU_GPFIFO_FLAGS_SUPPORT_VPR			(1U << 0U)
+#define NVGPU_GPFIFO_FLAGS_SUPPORT_DETERMINISTIC	(1U << 1U)
+#define NVGPU_GPFIFO_FLAGS_REPLAYABLE_FAULTS_ENABLE	(1U << 2U)
+#define NVGPU_GPFIFO_FLAGS_USERMODE_SUPPORT		(1U << 3U)
+
+/* Flags to be passed to nvgpu_submit_channel_gpfifo() */
+#define NVGPU_SUBMIT_FLAGS_FENCE_WAIT	(1U << 0U)
+#define NVGPU_SUBMIT_FLAGS_FENCE_GET	(1U << 1U)
+#define NVGPU_SUBMIT_FLAGS_HW_FORMAT	(1U << 2U)
+#define NVGPU_SUBMIT_FLAGS_SYNC_FENCE	(1U << 3U)
+#define NVGPU_SUBMIT_FLAGS_SUPPRESS_WFI	(1U << 4U)
+#define NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING	(1U << 5U)
+
+/*
+ * The binary format of 'struct nvgpu_channel_fence' introduced here
+ * should match that of 'struct nvgpu_fence' defined in uapi header, since
+ * this struct is intended to be a mirror copy of the uapi struct. This is
+ * not a hard requirement though because of nvgpu_get_fence_args conversion
+ * function.
+ */
+struct nvgpu_channel_fence {
+	u32 id;
+	u32 value;
+};
+
+/*
+ * The binary format of 'struct nvgpu_gpfifo_entry' introduced here
+ * should match that of 'struct nvgpu_gpfifo' defined in uapi header, since
+ * this struct is intended to be a mirror copy of the uapi struct. This is
+ * a rigid requirement because there's no conversion function and there are
+ * memcpy's present between the user gpfifo (of type nvgpu_gpfifo) and the
+ * kern gpfifo (of type nvgpu_gpfifo_entry).
+ */
+struct nvgpu_gpfifo_entry {
+	u32 entry0;
+	u32 entry1;
+};
+
+struct gpfifo_desc {
+	struct nvgpu_mem mem;
+	u32 entry_num;
+
+	u32 get;
+	u32 put;
+
+	bool wrap;
+
+	/* if gpfifo lives in vidmem or is forced to go via PRAMIN, first copy
+	 * from userspace to pipe and then from pipe to gpu buffer */
+	void *pipe;
+};
+
+struct nvgpu_gpfifo_args {
+	u32 num_entries;
+	u32 num_inflight_jobs;
+	u32 userd_dmabuf_fd;
+	u32 gpfifo_dmabuf_fd;
+	u32 work_submit_token;
+	u32 flags;
+};
+
+struct notification {
+	struct {
+		u32 nanoseconds[2];
+	} timestamp;
+	u32 info32;
+	u16 info16;
+	u16 status;
+};
+
+struct priv_cmd_queue {
+	struct nvgpu_mem mem;
+	u32 size;	/* num of entries in words */
+	u32 put;	/* put for priv cmd queue */
+	u32 get;	/* get for priv cmd queue */
+};
+
+struct priv_cmd_entry {
+	bool valid;
+	struct nvgpu_mem *mem;
+	u32 off;	/* offset in mem, in u32 entries */
+	u64 gva;
+	u32 get;	/* start of entry in queue */
+	u32 size;	/* in words */
+};
+
+struct channel_gk20a_job {
+	struct nvgpu_mapped_buf **mapped_buffers;
+	int num_mapped_buffers;
+	struct gk20a_fence *post_fence;
+	struct priv_cmd_entry *wait_cmd;
+	struct priv_cmd_entry *incr_cmd;
+	struct nvgpu_list_node list;
+};
+
+static inline struct channel_gk20a_job *
+channel_gk20a_job_from_list(struct nvgpu_list_node *node)
+{
+	return (struct channel_gk20a_job *)
+	((uintptr_t)node - offsetof(struct channel_gk20a_job, list));
+};
+
+struct channel_gk20a_joblist {
+	struct {
+		bool enabled;
+		unsigned int length;
+		unsigned int put;
+		unsigned int get;
+		struct channel_gk20a_job *jobs;
+		struct nvgpu_mutex read_lock;
+	} pre_alloc;
+
+	struct {
+		struct nvgpu_list_node jobs;
+		struct nvgpu_spinlock lock;
+	} dynamic;
+
+	/*
+	 * Synchronize abort cleanup (when closing a channel) and job cleanup
+	 * (asynchronously from worker) - protect from concurrent access when
+	 * job resources are being freed.
+	 */
+	struct nvgpu_mutex cleanup_lock;
+};
+
+struct channel_gk20a_timeout {
+	/* lock protects the running timer state */
+	struct nvgpu_raw_spinlock lock;
+	struct nvgpu_timeout timer;
+	bool running;
+	u32 gp_get;
+	u64 pb_get;
+
+	/* lock not needed */
+	u32 limit_ms;
+	bool enabled;
+	bool debug_dump;
+};
+
+/*
+ * Track refcount actions, saving their stack traces. This number specifies how
+ * many most recent actions are stored in a buffer. Set to 0 to disable. 128
+ * should be enough to track moderately hard problems from the start.
+ */
+#define GK20A_CHANNEL_REFCOUNT_TRACKING 0
+/* Stack depth for the saved actions. */
+#define GK20A_CHANNEL_REFCOUNT_TRACKING_STACKLEN 8
+
+/*
+ * Because the puts and gets are not linked together explicitly (although they
+ * should always come in pairs), it's not possible to tell which ref holder to
+ * delete from the list when doing a put. So, just store some number of most
+ * recent gets and puts in a ring buffer, to obtain a history.
+ *
+ * These are zeroed when a channel is closed, so a new one starts fresh.
+ */
+
+enum channel_gk20a_ref_action_type {
+	channel_gk20a_ref_action_get,
+	channel_gk20a_ref_action_put
+};
+
+#if GK20A_CHANNEL_REFCOUNT_TRACKING
+
+#include <linux/stacktrace.h>
+
+struct channel_gk20a_ref_action {
+	enum channel_gk20a_ref_action_type type;
+	s64 timestamp_ms;
+	/*
+	 * Many of these traces will be similar. Simpler to just capture
+	 * duplicates than to have a separate database for the entries.
+	 */
+	struct stack_trace trace;
+	unsigned long trace_entries[GK20A_CHANNEL_REFCOUNT_TRACKING_STACKLEN];
+};
+#endif
+
+/* this is the priv element of struct nvhost_channel */
+struct channel_gk20a {
+	struct gk20a *g; /* set only when channel is active */
+
+	struct nvgpu_list_node free_chs;
+
+	struct nvgpu_spinlock ref_obtain_lock;
+	nvgpu_atomic_t ref_count;
+	struct nvgpu_cond ref_count_dec_wq;
+#if GK20A_CHANNEL_REFCOUNT_TRACKING
+	/*
+	 * Ring buffer for most recent refcount gets and puts. Protected by
+	 * ref_actions_lock when getting or putting refs (i.e., adding
+	 * entries), and when reading entries.
+	 */
+	struct channel_gk20a_ref_action ref_actions[
+		GK20A_CHANNEL_REFCOUNT_TRACKING];
+	size_t ref_actions_put; /* index of next write */
+	struct nvgpu_spinlock ref_actions_lock;
+#endif
+
+	struct nvgpu_semaphore_int *hw_sema;
+
+	nvgpu_atomic_t bound;
+
+	int chid;
+	int tsgid;
+	pid_t pid;
+	pid_t tgid;
+	struct nvgpu_mutex ioctl_lock;
+
+	struct nvgpu_list_node ch_entry; /* channel's entry in TSG */
+
+	struct channel_gk20a_joblist joblist;
+	struct nvgpu_allocator fence_allocator;
+
+	struct vm_gk20a *vm;
+
+	struct gpfifo_desc gpfifo;
+
+	struct nvgpu_mem usermode_userd; /* Used for Usermode Submission */
+	struct nvgpu_mem usermode_gpfifo;
+	struct nvgpu_mem inst_block;
+
+	u64 userd_iova;
+	u64 userd_gpu_va;
+
+	struct priv_cmd_queue priv_cmd_q;
+
+	struct nvgpu_cond notifier_wq;
+	struct nvgpu_cond semaphore_wq;
+
+	/* kernel watchdog to kill stuck jobs */
+	struct channel_gk20a_timeout timeout;
+
+	/* for job cleanup handling in the background worker */
+	struct nvgpu_list_node worker_item;
+
+#if defined(CONFIG_GK20A_CYCLE_STATS)
+	struct {
+		void *cyclestate_buffer;
+		u32 cyclestate_buffer_size;
+		struct nvgpu_mutex cyclestate_buffer_mutex;
+	} cyclestate;
+
+	struct nvgpu_mutex cs_client_mutex;
+	struct gk20a_cs_snapshot_client *cs_client;
+#endif
+	struct nvgpu_mutex dbg_s_lock;
+	struct nvgpu_list_node dbg_s_list;
+
+	struct nvgpu_mutex sync_lock;
+	struct gk20a_channel_sync *sync;
+	struct gk20a_channel_sync *user_sync;
+
+#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
+	u64 virt_ctx;
+#endif
+
+	struct nvgpu_mem ctx_header;
+
+	/* Any operating system specific data. */
+	void *os_priv;
+
+	u32 obj_class;	/* we support only one obj per channel */
+
+	u32 timeout_accumulated_ms;
+	u32 timeout_gpfifo_get;
+
+	u32 subctx_id;
+	u32 runqueue_sel;
+
+	u32 timeout_ms_max;
+	u32 runlist_id;
+
+	bool mmu_nack_handled;
+	bool has_timedout;
+	bool referenceable;
+	bool vpr;
+	bool deterministic;
+	/* deterministic, but explicitly idle and submits disallowed */
+	bool deterministic_railgate_allowed;
+	bool cde;
+	bool usermode_submit_enabled;
+	bool timeout_debug_dump;
+	bool has_os_fence_framework_support;
+
+	bool is_privileged_channel;
+};
+
+static inline struct channel_gk20a *
+channel_gk20a_from_free_chs(struct nvgpu_list_node *node)
+{
+	return (struct channel_gk20a *)
+		   ((uintptr_t)node - offsetof(struct channel_gk20a, free_chs));
+};
+
+static inline struct channel_gk20a *
+channel_gk20a_from_ch_entry(struct nvgpu_list_node *node)
+{
+	return (struct channel_gk20a *)
+	   ((uintptr_t)node - offsetof(struct channel_gk20a, ch_entry));
+};
+
+static inline struct channel_gk20a *
+channel_gk20a_from_worker_item(struct nvgpu_list_node *node)
+{
+	return (struct channel_gk20a *)
+	   ((uintptr_t)node - offsetof(struct channel_gk20a, worker_item));
+};
+
+static inline bool gk20a_channel_as_bound(struct channel_gk20a *ch)
+{
+	return !!ch->vm;
+}
+int channel_gk20a_commit_va(struct channel_gk20a *c);
+int gk20a_init_channel_support(struct gk20a *, u32 chid);
+
+/* must be inside gk20a_busy()..gk20a_idle() */
+void gk20a_channel_close(struct channel_gk20a *ch);
+void __gk20a_channel_kill(struct channel_gk20a *ch);
+
+bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch,
+		u32 timeout_delta_ms, bool *progress);
+void gk20a_disable_channel(struct channel_gk20a *ch);
+void gk20a_channel_abort(struct channel_gk20a *ch, bool channel_preempt);
+void gk20a_channel_abort_clean_up(struct channel_gk20a *ch);
+void gk20a_channel_semaphore_wakeup(struct gk20a *g, bool post_events);
+int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 size,
+			     struct priv_cmd_entry *entry);
+int gk20a_free_priv_cmdbuf(struct channel_gk20a *c, struct priv_cmd_entry *e);
+
+int gk20a_enable_channel_tsg(struct gk20a *g, struct channel_gk20a *ch);
+int gk20a_disable_channel_tsg(struct gk20a *g, struct channel_gk20a *ch);
+
+int gk20a_channel_suspend(struct gk20a *g);
+int gk20a_channel_resume(struct gk20a *g);
+
+void gk20a_channel_deterministic_idle(struct gk20a *g);
+void gk20a_channel_deterministic_unidle(struct gk20a *g);
+
+int nvgpu_channel_worker_init(struct gk20a *g);
+void nvgpu_channel_worker_deinit(struct gk20a *g);
+
+struct channel_gk20a *gk20a_get_channel_from_file(int fd);
+void gk20a_channel_update(struct channel_gk20a *c);
+
+/* returns ch if reference was obtained */
+struct channel_gk20a *__must_check _gk20a_channel_get(struct channel_gk20a *ch,
+						      const char *caller);
+#define gk20a_channel_get(ch) _gk20a_channel_get(ch, __func__)
+
+
+void _gk20a_channel_put(struct channel_gk20a *ch, const char *caller);
+#define gk20a_channel_put(ch) _gk20a_channel_put(ch, __func__)
+
+int gk20a_wait_channel_idle(struct channel_gk20a *ch);
+
+/* runlist_id -1 is synonym for ENGINE_GR_GK20A runlist id */
+struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g,
+		s32 runlist_id,
+		bool is_privileged_channel,
+		pid_t pid, pid_t tid);
+
+int gk20a_channel_alloc_gpfifo(struct channel_gk20a *c,
+		struct nvgpu_gpfifo_args *gpfifo_args);
+
+void gk20a_channel_timeout_restart_all_channels(struct gk20a *g);
+
+bool channel_gk20a_is_prealloc_enabled(struct channel_gk20a *c);
+void channel_gk20a_joblist_lock(struct channel_gk20a *c);
+void channel_gk20a_joblist_unlock(struct channel_gk20a *c);
+bool channel_gk20a_joblist_is_empty(struct channel_gk20a *c);
+
+int channel_gk20a_update_runlist(struct channel_gk20a *c, bool add);
+int gk20a_channel_get_timescale_from_timeslice(struct gk20a *g,
+		unsigned int timeslice_period,
+		unsigned int *__timeslice_timeout, unsigned int *__timeslice_scale);
+
+void gk20a_wait_until_counter_is_N(
+	struct channel_gk20a *ch, nvgpu_atomic_t *counter, int wait_value,
+	struct nvgpu_cond *c, const char *caller, const char *counter_name);
+int channel_gk20a_alloc_job(struct channel_gk20a *c,
+		struct channel_gk20a_job **job_out);
+void channel_gk20a_free_job(struct channel_gk20a *c,
+		struct channel_gk20a_job *job);
+u32 nvgpu_get_gp_free_count(struct channel_gk20a *c);
+u32 nvgpu_gp_free_count(struct channel_gk20a *c);
+int gk20a_channel_add_job(struct channel_gk20a *c,
+				 struct channel_gk20a_job *job,
+				 bool skip_buffer_refcounting);
+void free_priv_cmdbuf(struct channel_gk20a *c,
+			     struct priv_cmd_entry *e);
+void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
+					bool clean_all);
+
+void gk20a_channel_free_usermode_buffers(struct channel_gk20a *c);
+u32 nvgpu_get_gpfifo_entry_size(void);
 
 int nvgpu_submit_channel_gpfifo_user(struct channel_gk20a *c,
 				struct nvgpu_gpfifo_userdata userdata,
@@ -47,9 +452,9 @@ int nvgpu_submit_channel_gpfifo_kernel(struct channel_gk20a *c,
 				struct gk20a_fence **fence_out);
 
 #ifdef CONFIG_DEBUG_FS
-void trace_write_pushbuffers(struct channel_gk20a *c, int count);
+void trace_write_pushbuffers(struct channel_gk20a *c, u32 count);
 #else
-static inline void trace_write_pushbuffers(struct channel_gk20a *c, int count)
+static inline void trace_write_pushbuffers(struct channel_gk20a *c, u32 count)
 {
 }
 #endif
-- 
cgit v1.2.2