/* * Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ #ifndef NVGPU_CHANNEL_H #define NVGPU_CHANNEL_H #include #include #include #include #include #include #include struct gk20a; struct dbg_session_gk20a; struct gk20a_fence; struct fifo_profile_gk20a; struct nvgpu_channel_sync; struct nvgpu_gpfifo_userdata; /* Flags to be passed to nvgpu_channel_setup_bind() */ #define NVGPU_SETUP_BIND_FLAGS_SUPPORT_VPR (1U << 0U) #define NVGPU_SETUP_BIND_FLAGS_SUPPORT_DETERMINISTIC (1U << 1U) #define NVGPU_SETUP_BIND_FLAGS_REPLAYABLE_FAULTS_ENABLE (1U << 2U) #define NVGPU_SETUP_BIND_FLAGS_USERMODE_SUPPORT (1U << 3U) /* Flags to be passed to nvgpu_submit_channel_gpfifo() */ #define NVGPU_SUBMIT_FLAGS_FENCE_WAIT (1U << 0U) #define NVGPU_SUBMIT_FLAGS_FENCE_GET (1U << 1U) #define NVGPU_SUBMIT_FLAGS_HW_FORMAT (1U << 2U) #define NVGPU_SUBMIT_FLAGS_SYNC_FENCE (1U << 3U) #define NVGPU_SUBMIT_FLAGS_SUPPRESS_WFI (1U << 4U) #define NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING (1U << 5U) /* * The binary format of 'struct nvgpu_channel_fence' introduced here * should match that of 'struct nvgpu_fence' defined in uapi header, since * this struct is intended to be a mirror copy of the uapi struct. This is * not a hard requirement though because of nvgpu_get_fence_args conversion * function. */ struct nvgpu_channel_fence { u32 id; u32 value; }; /* * The binary format of 'struct nvgpu_gpfifo_entry' introduced here * should match that of 'struct nvgpu_gpfifo' defined in uapi header, since * this struct is intended to be a mirror copy of the uapi struct. This is * a rigid requirement because there's no conversion function and there are * memcpy's present between the user gpfifo (of type nvgpu_gpfifo) and the * kern gpfifo (of type nvgpu_gpfifo_entry). */ struct nvgpu_gpfifo_entry { u32 entry0; u32 entry1; }; struct gpfifo_desc { struct nvgpu_mem mem; u32 entry_num; u32 get; u32 put; bool wrap; /* if gpfifo lives in vidmem or is forced to go via PRAMIN, first copy * from userspace to pipe and then from pipe to gpu buffer */ void *pipe; }; struct nvgpu_setup_bind_args { u32 num_gpfifo_entries; u32 num_inflight_jobs; u32 userd_dmabuf_fd; u64 userd_dmabuf_offset; u32 gpfifo_dmabuf_fd; u64 gpfifo_dmabuf_offset; u32 work_submit_token; u32 flags; }; struct notification { struct { u32 nanoseconds[2]; } timestamp; u32 info32; u16 info16; u16 status; }; struct priv_cmd_queue { struct nvgpu_mem mem; u32 size; /* num of entries in words */ u32 put; /* put for priv cmd queue */ u32 get; /* get for priv cmd queue */ }; struct priv_cmd_entry { bool valid; struct nvgpu_mem *mem; u32 off; /* offset in mem, in u32 entries */ u64 gva; u32 get; /* start of entry in queue */ u32 size; /* in words */ }; struct channel_gk20a_job { struct nvgpu_mapped_buf **mapped_buffers; int num_mapped_buffers; struct gk20a_fence *post_fence; struct priv_cmd_entry *wait_cmd; struct priv_cmd_entry *incr_cmd; struct nvgpu_list_node list; }; static inline struct channel_gk20a_job * channel_gk20a_job_from_list(struct nvgpu_list_node *node) { return (struct channel_gk20a_job *) ((uintptr_t)node - offsetof(struct channel_gk20a_job, list)); }; struct channel_gk20a_joblist { struct { bool enabled; unsigned int length; unsigned int put; unsigned int get; struct channel_gk20a_job *jobs; struct nvgpu_mutex read_lock; } pre_alloc; struct { struct nvgpu_list_node jobs; struct nvgpu_spinlock lock; } dynamic; /* * Synchronize abort cleanup (when closing a channel) and job cleanup * (asynchronously from worker) - protect from concurrent access when * job resources are being freed. */ struct nvgpu_mutex cleanup_lock; }; struct channel_gk20a_timeout { /* lock protects the running timer state */ struct nvgpu_spinlock lock; struct nvgpu_timeout timer; bool running; u32 gp_get; u64 pb_get; /* lock not needed */ u32 limit_ms; bool enabled; bool debug_dump; }; /* * Track refcount actions, saving their stack traces. This number specifies how * many most recent actions are stored in a buffer. Set to 0 to disable. 128 * should be enough to track moderately hard problems from the start. */ #define GK20A_CHANNEL_REFCOUNT_TRACKING 0 /* Stack depth for the saved actions. */ #define GK20A_CHANNEL_REFCOUNT_TRACKING_STACKLEN 8 /* * Because the puts and gets are not linked together explicitly (although they * should always come in pairs), it's not possible to tell which ref holder to * delete from the list when doing a put. So, just store some number of most * recent gets and puts in a ring buffer, to obtain a history. * * These are zeroed when a channel is closed, so a new one starts fresh. */ enum channel_gk20a_ref_action_type { channel_gk20a_ref_action_get, channel_gk20a_ref_action_put }; #if GK20A_CHANNEL_REFCOUNT_TRACKING #include struct channel_gk20a_ref_action { enum channel_gk20a_ref_action_type type; s64 timestamp_ms; /* * Many of these traces will be similar. Simpler to just capture * duplicates than to have a separate database for the entries. */ struct stack_trace trace; unsigned long trace_entries[GK20A_CHANNEL_REFCOUNT_TRACKING_STACKLEN]; }; #endif /* this is the priv element of struct nvhost_channel */ struct channel_gk20a { struct gk20a *g; /* set only when channel is active */ struct nvgpu_list_node free_chs; struct nvgpu_spinlock ref_obtain_lock; nvgpu_atomic_t ref_count; struct nvgpu_cond ref_count_dec_wq; #if GK20A_CHANNEL_REFCOUNT_TRACKING /* * Ring buffer for most recent refcount gets and puts. Protected by * ref_actions_lock when getting or putting refs (i.e., adding * entries), and when reading entries. */ struct channel_gk20a_ref_action ref_actions[ GK20A_CHANNEL_REFCOUNT_TRACKING]; size_t ref_actions_put; /* index of next write */ struct nvgpu_spinlock ref_actions_lock; #endif struct nvgpu_semaphore_int *hw_sema; nvgpu_atomic_t bound; u32 chid; u32 tsgid; pid_t pid; pid_t tgid; struct nvgpu_mutex ioctl_lock; struct nvgpu_list_node ch_entry; /* channel's entry in TSG */ struct channel_gk20a_joblist joblist; struct nvgpu_allocator fence_allocator; struct vm_gk20a *vm; struct gpfifo_desc gpfifo; struct nvgpu_mem usermode_userd; /* Used for Usermode Submission */ struct nvgpu_mem usermode_gpfifo; struct nvgpu_mem inst_block; u64 userd_iova; u64 userd_gpu_va; struct priv_cmd_queue priv_cmd_q; struct nvgpu_cond notifier_wq; struct nvgpu_cond semaphore_wq; /* kernel watchdog to kill stuck jobs */ struct channel_gk20a_timeout timeout; /* for job cleanup handling in the background worker */ struct nvgpu_list_node worker_item; #if defined(CONFIG_GK20A_CYCLE_STATS) struct { void *cyclestate_buffer; u32 cyclestate_buffer_size; struct nvgpu_mutex cyclestate_buffer_mutex; } cyclestate; struct nvgpu_mutex cs_client_mutex; struct gk20a_cs_snapshot_client *cs_client; #endif struct nvgpu_mutex dbg_s_lock; struct nvgpu_list_node dbg_s_list; struct nvgpu_mutex sync_lock; struct nvgpu_channel_sync *sync; struct nvgpu_channel_sync *user_sync; #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION u64 virt_ctx; #endif struct nvgpu_mem ctx_header; struct nvgpu_spinlock ch_timedout_lock; bool ch_timedout; /* Any operating system specific data. */ void *os_priv; u32 obj_class; /* we support only one obj per channel */ u32 timeout_accumulated_ms; u32 timeout_gpfifo_get; u32 subctx_id; u32 runqueue_sel; u32 timeout_ms_max; u32 runlist_id; bool mmu_nack_handled; bool referenceable; bool vpr; bool deterministic; /* deterministic, but explicitly idle and submits disallowed */ bool deterministic_railgate_allowed; bool cde; bool usermode_submit_enabled; bool timeout_debug_dump; bool has_os_fence_framework_support; bool is_privileged_channel; /** * MMU Debugger Mode is enabled for this channel if refcnt > 0 */ u32 mmu_debug_mode_refcnt; }; static inline struct channel_gk20a * channel_gk20a_from_free_chs(struct nvgpu_list_node *node) { return (struct channel_gk20a *) ((uintptr_t)node - offsetof(struct channel_gk20a, free_chs)); }; static inline struct channel_gk20a * channel_gk20a_from_ch_entry(struct nvgpu_list_node *node) { return (struct channel_gk20a *) ((uintptr_t)node - offsetof(struct channel_gk20a, ch_entry)); }; static inline struct channel_gk20a * channel_gk20a_from_worker_item(struct nvgpu_list_node *node) { return (struct channel_gk20a *) ((uintptr_t)node - offsetof(struct channel_gk20a, worker_item)); }; static inline bool gk20a_channel_as_bound(struct channel_gk20a *ch) { return !!ch->vm; } int channel_gk20a_commit_va(struct channel_gk20a *c); int gk20a_init_channel_support(struct gk20a *, u32 chid); /* must be inside gk20a_busy()..gk20a_idle() */ void gk20a_channel_close(struct channel_gk20a *ch); void __gk20a_channel_kill(struct channel_gk20a *ch); bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch, u32 timeout_delta_ms, bool *progress); void gk20a_disable_channel(struct channel_gk20a *ch); void gk20a_channel_abort(struct channel_gk20a *ch, bool channel_preempt); void gk20a_channel_abort_clean_up(struct channel_gk20a *ch); void gk20a_channel_semaphore_wakeup(struct gk20a *g, bool post_events); int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 size, struct priv_cmd_entry *entry); int gk20a_free_priv_cmdbuf(struct channel_gk20a *c, struct priv_cmd_entry *e); int gk20a_enable_channel_tsg(struct gk20a *g, struct channel_gk20a *ch); int gk20a_disable_channel_tsg(struct gk20a *g, struct channel_gk20a *ch); int gk20a_channel_suspend(struct gk20a *g); int gk20a_channel_resume(struct gk20a *g); void gk20a_channel_deterministic_idle(struct gk20a *g); void gk20a_channel_deterministic_unidle(struct gk20a *g); int nvgpu_channel_worker_init(struct gk20a *g); void nvgpu_channel_worker_deinit(struct gk20a *g); struct channel_gk20a *gk20a_get_channel_from_file(int fd); void gk20a_channel_update(struct channel_gk20a *c); /* returns ch if reference was obtained */ struct channel_gk20a *__must_check _gk20a_channel_get(struct channel_gk20a *ch, const char *caller); #define gk20a_channel_get(ch) _gk20a_channel_get(ch, __func__) void _gk20a_channel_put(struct channel_gk20a *ch, const char *caller); #define gk20a_channel_put(ch) _gk20a_channel_put(ch, __func__) /* returns NULL if could not take a ref to the channel */ struct channel_gk20a *__must_check _gk20a_channel_from_id(struct gk20a *g, u32 chid, const char *caller); #define gk20a_channel_from_id(g, chid) _gk20a_channel_from_id(g, chid, __func__) int gk20a_wait_channel_idle(struct channel_gk20a *ch); /* runlist_id -1 is synonym for ENGINE_GR_GK20A runlist id */ struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g, s32 runlist_id, bool is_privileged_channel, pid_t pid, pid_t tid); int nvgpu_channel_setup_bind(struct channel_gk20a *c, struct nvgpu_setup_bind_args *args); void gk20a_channel_timeout_restart_all_channels(struct gk20a *g); bool channel_gk20a_is_prealloc_enabled(struct channel_gk20a *c); void channel_gk20a_joblist_lock(struct channel_gk20a *c); void channel_gk20a_joblist_unlock(struct channel_gk20a *c); bool channel_gk20a_joblist_is_empty(struct channel_gk20a *c); int channel_gk20a_update_runlist(struct channel_gk20a *c, bool add); int gk20a_channel_get_timescale_from_timeslice(struct gk20a *g, unsigned int timeslice_period, unsigned int *__timeslice_timeout, unsigned int *__timeslice_scale); void gk20a_wait_until_counter_is_N( struct channel_gk20a *ch, nvgpu_atomic_t *counter, int wait_value, struct nvgpu_cond *c, const char *caller, const char *counter_name); int channel_gk20a_alloc_job(struct channel_gk20a *c, struct channel_gk20a_job **job_out); void channel_gk20a_free_job(struct channel_gk20a *c, struct channel_gk20a_job *job); u32 nvgpu_get_gp_free_count(struct channel_gk20a *c); u32 nvgpu_gp_free_count(struct channel_gk20a *c); int gk20a_channel_add_job(struct channel_gk20a *c, struct channel_gk20a_job *job, bool skip_buffer_refcounting); void free_priv_cmdbuf(struct channel_gk20a *c, struct priv_cmd_entry *e); void gk20a_channel_clean_up_jobs(struct channel_gk20a *c, bool clean_all); void gk20a_channel_free_usermode_buffers(struct channel_gk20a *c); u32 nvgpu_get_gpfifo_entry_size(void); int nvgpu_submit_channel_gpfifo_user(struct channel_gk20a *c, struct nvgpu_gpfifo_userdata userdata, u32 num_entries, u32 flags, struct nvgpu_channel_fence *fence, struct gk20a_fence **fence_out, struct fifo_profile_gk20a *profile); int nvgpu_submit_channel_gpfifo_kernel(struct channel_gk20a *c, struct nvgpu_gpfifo_entry *gpfifo, u32 num_entries, u32 flags, struct nvgpu_channel_fence *fence, struct gk20a_fence **fence_out); #ifdef CONFIG_DEBUG_FS void trace_write_pushbuffers(struct channel_gk20a *c, u32 count); #else static inline void trace_write_pushbuffers(struct channel_gk20a *c, u32 count) { } #endif void gk20a_channel_set_timedout(struct channel_gk20a *ch); bool gk20a_channel_check_timedout(struct channel_gk20a *ch); #endif