From 34d552957dd0e3e3363067fc9b9af64281d29396 Mon Sep 17 00:00:00 2001 From: Konsta Holtta Date: Fri, 31 Aug 2018 12:59:37 +0300 Subject: gpu: nvgpu: move channel header to common channel_gk20a is clear from chip specifics and from most dependencies, so move it under the common directory. Jira NVGPU-967 Change-Id: I41f2160b96d4ec84064288ecc22bb360e82352df Signed-off-by: Konsta Holtta Reviewed-on: https://git-master.nvidia.com/r/1810578 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/include/nvgpu/channel.h | 417 +++++++++++++++++++++++++++++- 1 file changed, 411 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/nvgpu/include') diff --git a/drivers/gpu/nvgpu/include/nvgpu/channel.h b/drivers/gpu/nvgpu/include/nvgpu/channel.h index 7434f0e7..6cca843e 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/channel.h +++ b/drivers/gpu/nvgpu/include/nvgpu/channel.h @@ -23,13 +23,418 @@ #ifndef NVGPU_CHANNEL_H #define NVGPU_CHANNEL_H -#include +#include +#include +#include +#include +#include +#include +#include -#include "gk20a/gk20a.h" - -struct nvgpu_channel_fence; +struct gk20a; +struct dbg_session_gk20a; struct gk20a_fence; struct fifo_profile_gk20a; +struct gk20a_channel_sync; +struct nvgpu_gpfifo_userdata; + +/* Flags to be passed to gk20a_channel_alloc_gpfifo() */ +#define NVGPU_GPFIFO_FLAGS_SUPPORT_VPR (1U << 0U) +#define NVGPU_GPFIFO_FLAGS_SUPPORT_DETERMINISTIC (1U << 1U) +#define NVGPU_GPFIFO_FLAGS_REPLAYABLE_FAULTS_ENABLE (1U << 2U) +#define NVGPU_GPFIFO_FLAGS_USERMODE_SUPPORT (1U << 3U) + +/* Flags to be passed to nvgpu_submit_channel_gpfifo() */ +#define NVGPU_SUBMIT_FLAGS_FENCE_WAIT (1U << 0U) +#define NVGPU_SUBMIT_FLAGS_FENCE_GET (1U << 1U) +#define NVGPU_SUBMIT_FLAGS_HW_FORMAT (1U << 2U) +#define NVGPU_SUBMIT_FLAGS_SYNC_FENCE (1U << 3U) +#define NVGPU_SUBMIT_FLAGS_SUPPRESS_WFI (1U << 4U) +#define NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING (1U << 5U) + +/* + * The binary format of 'struct nvgpu_channel_fence' introduced here + * should match that of 'struct nvgpu_fence' defined in uapi header, since + * this struct is intended to be a mirror copy of the uapi struct. This is + * not a hard requirement though because of nvgpu_get_fence_args conversion + * function. + */ +struct nvgpu_channel_fence { + u32 id; + u32 value; +}; + +/* + * The binary format of 'struct nvgpu_gpfifo_entry' introduced here + * should match that of 'struct nvgpu_gpfifo' defined in uapi header, since + * this struct is intended to be a mirror copy of the uapi struct. This is + * a rigid requirement because there's no conversion function and there are + * memcpy's present between the user gpfifo (of type nvgpu_gpfifo) and the + * kern gpfifo (of type nvgpu_gpfifo_entry). + */ +struct nvgpu_gpfifo_entry { + u32 entry0; + u32 entry1; +}; + +struct gpfifo_desc { + struct nvgpu_mem mem; + u32 entry_num; + + u32 get; + u32 put; + + bool wrap; + + /* if gpfifo lives in vidmem or is forced to go via PRAMIN, first copy + * from userspace to pipe and then from pipe to gpu buffer */ + void *pipe; +}; + +struct nvgpu_gpfifo_args { + u32 num_entries; + u32 num_inflight_jobs; + u32 userd_dmabuf_fd; + u32 gpfifo_dmabuf_fd; + u32 work_submit_token; + u32 flags; +}; + +struct notification { + struct { + u32 nanoseconds[2]; + } timestamp; + u32 info32; + u16 info16; + u16 status; +}; + +struct priv_cmd_queue { + struct nvgpu_mem mem; + u32 size; /* num of entries in words */ + u32 put; /* put for priv cmd queue */ + u32 get; /* get for priv cmd queue */ +}; + +struct priv_cmd_entry { + bool valid; + struct nvgpu_mem *mem; + u32 off; /* offset in mem, in u32 entries */ + u64 gva; + u32 get; /* start of entry in queue */ + u32 size; /* in words */ +}; + +struct channel_gk20a_job { + struct nvgpu_mapped_buf **mapped_buffers; + int num_mapped_buffers; + struct gk20a_fence *post_fence; + struct priv_cmd_entry *wait_cmd; + struct priv_cmd_entry *incr_cmd; + struct nvgpu_list_node list; +}; + +static inline struct channel_gk20a_job * +channel_gk20a_job_from_list(struct nvgpu_list_node *node) +{ + return (struct channel_gk20a_job *) + ((uintptr_t)node - offsetof(struct channel_gk20a_job, list)); +}; + +struct channel_gk20a_joblist { + struct { + bool enabled; + unsigned int length; + unsigned int put; + unsigned int get; + struct channel_gk20a_job *jobs; + struct nvgpu_mutex read_lock; + } pre_alloc; + + struct { + struct nvgpu_list_node jobs; + struct nvgpu_spinlock lock; + } dynamic; + + /* + * Synchronize abort cleanup (when closing a channel) and job cleanup + * (asynchronously from worker) - protect from concurrent access when + * job resources are being freed. + */ + struct nvgpu_mutex cleanup_lock; +}; + +struct channel_gk20a_timeout { + /* lock protects the running timer state */ + struct nvgpu_raw_spinlock lock; + struct nvgpu_timeout timer; + bool running; + u32 gp_get; + u64 pb_get; + + /* lock not needed */ + u32 limit_ms; + bool enabled; + bool debug_dump; +}; + +/* + * Track refcount actions, saving their stack traces. This number specifies how + * many most recent actions are stored in a buffer. Set to 0 to disable. 128 + * should be enough to track moderately hard problems from the start. + */ +#define GK20A_CHANNEL_REFCOUNT_TRACKING 0 +/* Stack depth for the saved actions. */ +#define GK20A_CHANNEL_REFCOUNT_TRACKING_STACKLEN 8 + +/* + * Because the puts and gets are not linked together explicitly (although they + * should always come in pairs), it's not possible to tell which ref holder to + * delete from the list when doing a put. So, just store some number of most + * recent gets and puts in a ring buffer, to obtain a history. + * + * These are zeroed when a channel is closed, so a new one starts fresh. + */ + +enum channel_gk20a_ref_action_type { + channel_gk20a_ref_action_get, + channel_gk20a_ref_action_put +}; + +#if GK20A_CHANNEL_REFCOUNT_TRACKING + +#include + +struct channel_gk20a_ref_action { + enum channel_gk20a_ref_action_type type; + s64 timestamp_ms; + /* + * Many of these traces will be similar. Simpler to just capture + * duplicates than to have a separate database for the entries. + */ + struct stack_trace trace; + unsigned long trace_entries[GK20A_CHANNEL_REFCOUNT_TRACKING_STACKLEN]; +}; +#endif + +/* this is the priv element of struct nvhost_channel */ +struct channel_gk20a { + struct gk20a *g; /* set only when channel is active */ + + struct nvgpu_list_node free_chs; + + struct nvgpu_spinlock ref_obtain_lock; + nvgpu_atomic_t ref_count; + struct nvgpu_cond ref_count_dec_wq; +#if GK20A_CHANNEL_REFCOUNT_TRACKING + /* + * Ring buffer for most recent refcount gets and puts. Protected by + * ref_actions_lock when getting or putting refs (i.e., adding + * entries), and when reading entries. + */ + struct channel_gk20a_ref_action ref_actions[ + GK20A_CHANNEL_REFCOUNT_TRACKING]; + size_t ref_actions_put; /* index of next write */ + struct nvgpu_spinlock ref_actions_lock; +#endif + + struct nvgpu_semaphore_int *hw_sema; + + nvgpu_atomic_t bound; + + int chid; + int tsgid; + pid_t pid; + pid_t tgid; + struct nvgpu_mutex ioctl_lock; + + struct nvgpu_list_node ch_entry; /* channel's entry in TSG */ + + struct channel_gk20a_joblist joblist; + struct nvgpu_allocator fence_allocator; + + struct vm_gk20a *vm; + + struct gpfifo_desc gpfifo; + + struct nvgpu_mem usermode_userd; /* Used for Usermode Submission */ + struct nvgpu_mem usermode_gpfifo; + struct nvgpu_mem inst_block; + + u64 userd_iova; + u64 userd_gpu_va; + + struct priv_cmd_queue priv_cmd_q; + + struct nvgpu_cond notifier_wq; + struct nvgpu_cond semaphore_wq; + + /* kernel watchdog to kill stuck jobs */ + struct channel_gk20a_timeout timeout; + + /* for job cleanup handling in the background worker */ + struct nvgpu_list_node worker_item; + +#if defined(CONFIG_GK20A_CYCLE_STATS) + struct { + void *cyclestate_buffer; + u32 cyclestate_buffer_size; + struct nvgpu_mutex cyclestate_buffer_mutex; + } cyclestate; + + struct nvgpu_mutex cs_client_mutex; + struct gk20a_cs_snapshot_client *cs_client; +#endif + struct nvgpu_mutex dbg_s_lock; + struct nvgpu_list_node dbg_s_list; + + struct nvgpu_mutex sync_lock; + struct gk20a_channel_sync *sync; + struct gk20a_channel_sync *user_sync; + +#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION + u64 virt_ctx; +#endif + + struct nvgpu_mem ctx_header; + + /* Any operating system specific data. */ + void *os_priv; + + u32 obj_class; /* we support only one obj per channel */ + + u32 timeout_accumulated_ms; + u32 timeout_gpfifo_get; + + u32 subctx_id; + u32 runqueue_sel; + + u32 timeout_ms_max; + u32 runlist_id; + + bool mmu_nack_handled; + bool has_timedout; + bool referenceable; + bool vpr; + bool deterministic; + /* deterministic, but explicitly idle and submits disallowed */ + bool deterministic_railgate_allowed; + bool cde; + bool usermode_submit_enabled; + bool timeout_debug_dump; + bool has_os_fence_framework_support; + + bool is_privileged_channel; +}; + +static inline struct channel_gk20a * +channel_gk20a_from_free_chs(struct nvgpu_list_node *node) +{ + return (struct channel_gk20a *) + ((uintptr_t)node - offsetof(struct channel_gk20a, free_chs)); +}; + +static inline struct channel_gk20a * +channel_gk20a_from_ch_entry(struct nvgpu_list_node *node) +{ + return (struct channel_gk20a *) + ((uintptr_t)node - offsetof(struct channel_gk20a, ch_entry)); +}; + +static inline struct channel_gk20a * +channel_gk20a_from_worker_item(struct nvgpu_list_node *node) +{ + return (struct channel_gk20a *) + ((uintptr_t)node - offsetof(struct channel_gk20a, worker_item)); +}; + +static inline bool gk20a_channel_as_bound(struct channel_gk20a *ch) +{ + return !!ch->vm; +} +int channel_gk20a_commit_va(struct channel_gk20a *c); +int gk20a_init_channel_support(struct gk20a *, u32 chid); + +/* must be inside gk20a_busy()..gk20a_idle() */ +void gk20a_channel_close(struct channel_gk20a *ch); +void __gk20a_channel_kill(struct channel_gk20a *ch); + +bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch, + u32 timeout_delta_ms, bool *progress); +void gk20a_disable_channel(struct channel_gk20a *ch); +void gk20a_channel_abort(struct channel_gk20a *ch, bool channel_preempt); +void gk20a_channel_abort_clean_up(struct channel_gk20a *ch); +void gk20a_channel_semaphore_wakeup(struct gk20a *g, bool post_events); +int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 size, + struct priv_cmd_entry *entry); +int gk20a_free_priv_cmdbuf(struct channel_gk20a *c, struct priv_cmd_entry *e); + +int gk20a_enable_channel_tsg(struct gk20a *g, struct channel_gk20a *ch); +int gk20a_disable_channel_tsg(struct gk20a *g, struct channel_gk20a *ch); + +int gk20a_channel_suspend(struct gk20a *g); +int gk20a_channel_resume(struct gk20a *g); + +void gk20a_channel_deterministic_idle(struct gk20a *g); +void gk20a_channel_deterministic_unidle(struct gk20a *g); + +int nvgpu_channel_worker_init(struct gk20a *g); +void nvgpu_channel_worker_deinit(struct gk20a *g); + +struct channel_gk20a *gk20a_get_channel_from_file(int fd); +void gk20a_channel_update(struct channel_gk20a *c); + +/* returns ch if reference was obtained */ +struct channel_gk20a *__must_check _gk20a_channel_get(struct channel_gk20a *ch, + const char *caller); +#define gk20a_channel_get(ch) _gk20a_channel_get(ch, __func__) + + +void _gk20a_channel_put(struct channel_gk20a *ch, const char *caller); +#define gk20a_channel_put(ch) _gk20a_channel_put(ch, __func__) + +int gk20a_wait_channel_idle(struct channel_gk20a *ch); + +/* runlist_id -1 is synonym for ENGINE_GR_GK20A runlist id */ +struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g, + s32 runlist_id, + bool is_privileged_channel, + pid_t pid, pid_t tid); + +int gk20a_channel_alloc_gpfifo(struct channel_gk20a *c, + struct nvgpu_gpfifo_args *gpfifo_args); + +void gk20a_channel_timeout_restart_all_channels(struct gk20a *g); + +bool channel_gk20a_is_prealloc_enabled(struct channel_gk20a *c); +void channel_gk20a_joblist_lock(struct channel_gk20a *c); +void channel_gk20a_joblist_unlock(struct channel_gk20a *c); +bool channel_gk20a_joblist_is_empty(struct channel_gk20a *c); + +int channel_gk20a_update_runlist(struct channel_gk20a *c, bool add); +int gk20a_channel_get_timescale_from_timeslice(struct gk20a *g, + unsigned int timeslice_period, + unsigned int *__timeslice_timeout, unsigned int *__timeslice_scale); + +void gk20a_wait_until_counter_is_N( + struct channel_gk20a *ch, nvgpu_atomic_t *counter, int wait_value, + struct nvgpu_cond *c, const char *caller, const char *counter_name); +int channel_gk20a_alloc_job(struct channel_gk20a *c, + struct channel_gk20a_job **job_out); +void channel_gk20a_free_job(struct channel_gk20a *c, + struct channel_gk20a_job *job); +u32 nvgpu_get_gp_free_count(struct channel_gk20a *c); +u32 nvgpu_gp_free_count(struct channel_gk20a *c); +int gk20a_channel_add_job(struct channel_gk20a *c, + struct channel_gk20a_job *job, + bool skip_buffer_refcounting); +void free_priv_cmdbuf(struct channel_gk20a *c, + struct priv_cmd_entry *e); +void gk20a_channel_clean_up_jobs(struct channel_gk20a *c, + bool clean_all); + +void gk20a_channel_free_usermode_buffers(struct channel_gk20a *c); +u32 nvgpu_get_gpfifo_entry_size(void); int nvgpu_submit_channel_gpfifo_user(struct channel_gk20a *c, struct nvgpu_gpfifo_userdata userdata, @@ -47,9 +452,9 @@ int nvgpu_submit_channel_gpfifo_kernel(struct channel_gk20a *c, struct gk20a_fence **fence_out); #ifdef CONFIG_DEBUG_FS -void trace_write_pushbuffers(struct channel_gk20a *c, int count); +void trace_write_pushbuffers(struct channel_gk20a *c, u32 count); #else -static inline void trace_write_pushbuffers(struct channel_gk20a *c, int count) +static inline void trace_write_pushbuffers(struct channel_gk20a *c, u32 count) { } #endif -- cgit v1.2.2