From 0c387d76dcc7e665255200ba8d98b9abb11cb4a1 Mon Sep 17 00:00:00 2001 From: Konsta Holtta Date: Tue, 21 Aug 2018 12:27:07 +0300 Subject: gpu: nvgpu: move channel code to common Do a simple rename of channel_gk20a.c to common/fifo/channel.c. Header cleanup and the like will soon follow. Also rename the os-specific files to have unique names across directories because tmake requires that. Jira NVGPU-967 Change-Id: I302bbbbe29735264e832378d444a176a4023e3e1 Signed-off-by: Konsta Holtta Reviewed-on: https://git-master.nvidia.com/r/1804608 Reviewed-by: svc-misra-checker GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom Reviewed-by: Richard Zhao Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/Makefile | 4 +- drivers/gpu/nvgpu/Makefile.sources | 4 +- drivers/gpu/nvgpu/common/fifo/channel.c | 2262 ++++++++++++++++++++++++++++ drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 2262 ---------------------------- drivers/gpu/nvgpu/os/linux/channel.c | 508 ------- drivers/gpu/nvgpu/os/linux/linux-channel.c | 508 +++++++ drivers/gpu/nvgpu/os/posix/channel.c | 32 - drivers/gpu/nvgpu/os/posix/posix-channel.c | 32 + 8 files changed, 2806 insertions(+), 2806 deletions(-) create mode 100644 drivers/gpu/nvgpu/common/fifo/channel.c delete mode 100644 drivers/gpu/nvgpu/gk20a/channel_gk20a.c delete mode 100644 drivers/gpu/nvgpu/os/linux/channel.c create mode 100644 drivers/gpu/nvgpu/os/linux/linux-channel.c delete mode 100644 drivers/gpu/nvgpu/os/posix/channel.c create mode 100644 drivers/gpu/nvgpu/os/posix/posix-channel.c diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile index c7ccefb5..a6f0f0ce 100644 --- a/drivers/gpu/nvgpu/Makefile +++ b/drivers/gpu/nvgpu/Makefile @@ -74,7 +74,7 @@ nvgpu-y += \ os/linux/comptags.o \ os/linux/dmabuf.o \ os/linux/sched.o \ - os/linux/channel.o \ + os/linux/linux-channel.o \ os/linux/sim.o \ os/linux/sim_pci.o \ os/linux/os_sched.o \ @@ -200,13 +200,13 @@ nvgpu-y += \ common/clock_gating/gv11b_gating_reglist.o \ common/sim.o \ common/sim_pci.o \ + common/fifo/channel.o \ common/fifo/submit.o \ common/ecc.o \ common/ce2.o \ gk20a/gk20a.o \ gk20a/ce2_gk20a.o \ gk20a/fifo_gk20a.o \ - gk20a/channel_gk20a.o \ gk20a/channel_sync_gk20a.o \ gk20a/dbg_gpu_gk20a.o \ gk20a/regops_gk20a.o \ diff --git a/drivers/gpu/nvgpu/Makefile.sources b/drivers/gpu/nvgpu/Makefile.sources index 503e0f3a..f1ba7f76 100644 --- a/drivers/gpu/nvgpu/Makefile.sources +++ b/drivers/gpu/nvgpu/Makefile.sources @@ -39,7 +39,7 @@ srcs := os/posix/nvgpu.c \ os/posix/error_notifier.c \ os/posix/fuse.c \ os/posix/clk_arb.c \ - os/posix/channel.c \ + os/posix/posix-channel.c \ os/posix/tsg.c \ os/posix/nvlink.c \ os/posix/lock.c \ @@ -104,6 +104,7 @@ srcs := os/posix/nvgpu.c \ common/clock_gating/gv11b_gating_reglist.c \ common/clock_gating/gp106_gating_reglist.c \ common/clock_gating/gv100_gating_reglist.c \ + common/fifo/channel.c \ common/fifo/submit.c \ boardobj/boardobj.c \ boardobj/boardobjgrp.c \ @@ -140,7 +141,6 @@ srcs := os/posix/nvgpu.c \ common/ptimer/ptimer_gk20a.c \ gk20a/ce2_gk20a.c \ gk20a/fifo_gk20a.c \ - gk20a/channel_gk20a.c \ gk20a/channel_sync_gk20a.c \ gk20a/dbg_gpu_gk20a.c \ gk20a/regops_gk20a.c \ diff --git a/drivers/gpu/nvgpu/common/fifo/channel.c b/drivers/gpu/nvgpu/common/fifo/channel.c new file mode 100644 index 00000000..5966e191 --- /dev/null +++ b/drivers/gpu/nvgpu/common/fifo/channel.c @@ -0,0 +1,2262 @@ +/* + * GK20A Graphics channel + * + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gk20a/gk20a.h" +#include "gk20a/dbg_gpu_gk20a.h" +#include "gk20a/fence_gk20a.h" + +static void free_channel(struct fifo_gk20a *f, struct channel_gk20a *c); +static void gk20a_channel_dump_ref_actions(struct channel_gk20a *c); + +static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c); +static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c); + +static void channel_gk20a_free_prealloc_resources(struct channel_gk20a *c); + +static void channel_gk20a_joblist_add(struct channel_gk20a *c, + struct channel_gk20a_job *job); +static void channel_gk20a_joblist_delete(struct channel_gk20a *c, + struct channel_gk20a_job *job); +static struct channel_gk20a_job *channel_gk20a_joblist_peek( + struct channel_gk20a *c); + +/* allocate GPU channel */ +static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f) +{ + struct channel_gk20a *ch = NULL; + struct gk20a *g = f->g; + + nvgpu_mutex_acquire(&f->free_chs_mutex); + if (!nvgpu_list_empty(&f->free_chs)) { + ch = nvgpu_list_first_entry(&f->free_chs, channel_gk20a, + free_chs); + nvgpu_list_del(&ch->free_chs); + WARN_ON(nvgpu_atomic_read(&ch->ref_count)); + WARN_ON(ch->referenceable); + f->used_channels++; + } + nvgpu_mutex_release(&f->free_chs_mutex); + + if (g->aggressive_sync_destroy_thresh && + (f->used_channels > + g->aggressive_sync_destroy_thresh)) + g->aggressive_sync_destroy = true; + + return ch; +} + +static void free_channel(struct fifo_gk20a *f, + struct channel_gk20a *ch) +{ + struct gk20a *g = f->g; + + trace_gk20a_release_used_channel(ch->chid); + /* refcount is zero here and channel is in a freed/dead state */ + nvgpu_mutex_acquire(&f->free_chs_mutex); + /* add to head to increase visibility of timing-related bugs */ + nvgpu_list_add(&ch->free_chs, &f->free_chs); + f->used_channels--; + nvgpu_mutex_release(&f->free_chs_mutex); + + /* + * On teardown it is not possible to dereference platform, but ignoring + * this is fine then because no new channels would be created. + */ + if (!nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING)) { + if (g->aggressive_sync_destroy_thresh && + (f->used_channels < + g->aggressive_sync_destroy_thresh)) + g->aggressive_sync_destroy = false; + } +} + +int channel_gk20a_commit_va(struct channel_gk20a *c) +{ + struct gk20a *g = c->g; + + nvgpu_log_fn(g, " "); + + g->ops.mm.init_inst_block(&c->inst_block, c->vm, + c->vm->gmmu_page_sizes[GMMU_PAGE_SIZE_BIG]); + + return 0; +} + +int gk20a_channel_get_timescale_from_timeslice(struct gk20a *g, + unsigned int timeslice_period, + unsigned int *__timeslice_timeout, unsigned int *__timeslice_scale) +{ + unsigned int value = scale_ptimer(timeslice_period, + ptimer_scalingfactor10x(g->ptimer_src_freq)); + unsigned int shift = 0; + + /* value field is 8 bits long */ + while (value >= 1 << 8) { + value >>= 1; + shift++; + } + + /* time slice register is only 18bits long */ + if ((value << shift) >= 1<<19) { + nvgpu_err(g, "Requested timeslice value is clamped to 18 bits\n"); + value = 255; + shift = 10; + } + + *__timeslice_timeout = value; + *__timeslice_scale = shift; + + return 0; +} + +int channel_gk20a_update_runlist(struct channel_gk20a *c, bool add) +{ + return c->g->ops.fifo.update_runlist(c->g, c->runlist_id, c->chid, add, true); +} + +int gk20a_enable_channel_tsg(struct gk20a *g, struct channel_gk20a *ch) +{ + struct tsg_gk20a *tsg; + + if (gk20a_is_channel_marked_as_tsg(ch)) { + tsg = &g->fifo.tsg[ch->tsgid]; + g->ops.fifo.enable_tsg(tsg); + } else { + g->ops.fifo.enable_channel(ch); + } + + return 0; +} + +int gk20a_disable_channel_tsg(struct gk20a *g, struct channel_gk20a *ch) +{ + struct tsg_gk20a *tsg; + + if (gk20a_is_channel_marked_as_tsg(ch)) { + tsg = &g->fifo.tsg[ch->tsgid]; + g->ops.fifo.disable_tsg(tsg); + } else { + g->ops.fifo.disable_channel(ch); + } + + return 0; +} + +void gk20a_channel_abort_clean_up(struct channel_gk20a *ch) +{ + /* synchronize with actual job cleanup */ + nvgpu_mutex_acquire(&ch->joblist.cleanup_lock); + + /* ensure no fences are pending */ + nvgpu_mutex_acquire(&ch->sync_lock); + if (ch->sync) + ch->sync->set_min_eq_max(ch->sync); + if (ch->user_sync) + ch->user_sync->set_safe_state(ch->user_sync); + nvgpu_mutex_release(&ch->sync_lock); + + nvgpu_mutex_release(&ch->joblist.cleanup_lock); + + /* + * When closing the channel, this scheduled update holds one ref which + * is waited for before advancing with freeing. + */ + gk20a_channel_update(ch); +} + +void gk20a_channel_abort(struct channel_gk20a *ch, bool channel_preempt) +{ + nvgpu_log_fn(ch->g, " "); + + if (gk20a_is_channel_marked_as_tsg(ch)) + return gk20a_fifo_abort_tsg(ch->g, ch->tsgid, channel_preempt); + + /* make sure new kickoffs are prevented */ + ch->has_timedout = true; + + ch->g->ops.fifo.disable_channel(ch); + + if (channel_preempt && gk20a_is_channel_marked_as_tsg(ch)) + ch->g->ops.fifo.preempt_channel(ch->g, ch->chid); + + if (ch->g->ops.fifo.ch_abort_clean_up) + ch->g->ops.fifo.ch_abort_clean_up(ch); +} + +int gk20a_wait_channel_idle(struct channel_gk20a *ch) +{ + bool channel_idle = false; + struct nvgpu_timeout timeout; + + nvgpu_timeout_init(ch->g, &timeout, gk20a_get_gr_idle_timeout(ch->g), + NVGPU_TIMER_CPU_TIMER); + + do { + channel_gk20a_joblist_lock(ch); + channel_idle = channel_gk20a_joblist_is_empty(ch); + channel_gk20a_joblist_unlock(ch); + if (channel_idle) + break; + + nvgpu_usleep_range(1000, 3000); + } while (!nvgpu_timeout_expired(&timeout)); + + if (!channel_idle) { + nvgpu_err(ch->g, "jobs not freed for channel %d", + ch->chid); + return -EBUSY; + } + + return 0; +} + +void gk20a_disable_channel(struct channel_gk20a *ch) +{ + gk20a_channel_abort(ch, true); + channel_gk20a_update_runlist(ch, false); +} + +void gk20a_wait_until_counter_is_N( + struct channel_gk20a *ch, nvgpu_atomic_t *counter, int wait_value, + struct nvgpu_cond *c, const char *caller, const char *counter_name) +{ + while (true) { + if (NVGPU_COND_WAIT( + c, + nvgpu_atomic_read(counter) == wait_value, + 5000) == 0) + break; + + nvgpu_warn(ch->g, + "%s: channel %d, still waiting, %s left: %d, waiting for: %d", + caller, ch->chid, counter_name, + nvgpu_atomic_read(counter), wait_value); + + gk20a_channel_dump_ref_actions(ch); + } +} + +/* call ONLY when no references to the channel exist: after the last put */ +static void gk20a_free_channel(struct channel_gk20a *ch, bool force) +{ + struct gk20a *g = ch->g; + struct fifo_gk20a *f = &g->fifo; + struct gr_gk20a *gr = &g->gr; + struct vm_gk20a *ch_vm = ch->vm; + unsigned long timeout = gk20a_get_gr_idle_timeout(g); + struct dbg_session_gk20a *dbg_s; + struct dbg_session_data *session_data, *tmp_s; + struct dbg_session_channel_data *ch_data, *tmp; + int err; + + nvgpu_log_fn(g, " "); + + WARN_ON(ch->g == NULL); + + trace_gk20a_free_channel(ch->chid); + + if (g->os_channel.close) + g->os_channel.close(ch); + + /* + * Disable channel/TSG and unbind here. This should not be executed if + * HW access is not available during shutdown/removal path as it will + * trigger a timeout + */ + if (!nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING)) { + /* abort channel and remove from runlist */ + if (gk20a_is_channel_marked_as_tsg(ch)) { + err = gk20a_tsg_unbind_channel(ch); + if (err) + nvgpu_err(g, + "failed to unbind channel %d from TSG", + ch->chid); + } else { + /* + * Channel is already unbound from TSG by User with + * explicit call + * Nothing to do here in that case + */ + } + } + /* wait until there's only our ref to the channel */ + if (!force) + gk20a_wait_until_counter_is_N( + ch, &ch->ref_count, 1, &ch->ref_count_dec_wq, + __func__, "references"); + + /* wait until all pending interrupts for recently completed + * jobs are handled */ + nvgpu_wait_for_deferred_interrupts(g); + + /* prevent new refs */ + nvgpu_spinlock_acquire(&ch->ref_obtain_lock); + if (!ch->referenceable) { + nvgpu_spinlock_release(&ch->ref_obtain_lock); + nvgpu_err(ch->g, + "Extra %s() called to channel %u", + __func__, ch->chid); + return; + } + ch->referenceable = false; + nvgpu_spinlock_release(&ch->ref_obtain_lock); + + /* matches with the initial reference in gk20a_open_new_channel() */ + nvgpu_atomic_dec(&ch->ref_count); + + /* wait until no more refs to the channel */ + if (!force) + gk20a_wait_until_counter_is_N( + ch, &ch->ref_count, 0, &ch->ref_count_dec_wq, + __func__, "references"); + + /* if engine reset was deferred, perform it now */ + nvgpu_mutex_acquire(&f->deferred_reset_mutex); + if (g->fifo.deferred_reset_pending) { + nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, "engine reset was" + " deferred, running now"); + /* if lock is already taken, a reset is taking place + so no need to repeat */ + if (nvgpu_mutex_tryacquire(&g->fifo.gr_reset_mutex)) { + gk20a_fifo_deferred_reset(g, ch); + nvgpu_mutex_release(&g->fifo.gr_reset_mutex); + } + } + nvgpu_mutex_release(&f->deferred_reset_mutex); + + if (!gk20a_channel_as_bound(ch)) + goto unbind; + + nvgpu_log_info(g, "freeing bound channel context, timeout=%ld", + timeout); + +#ifdef CONFIG_GK20A_CTXSW_TRACE + if (g->ops.fecs_trace.unbind_channel && !ch->vpr) + g->ops.fecs_trace.unbind_channel(g, ch); +#endif + + if(g->ops.fifo.free_channel_ctx_header) + g->ops.fifo.free_channel_ctx_header(ch); + + if (ch->usermode_submit_enabled) { + gk20a_channel_free_usermode_buffers(ch); + ch->userd_iova = nvgpu_mem_get_addr(g, &f->userd) + + ch->chid * f->userd_entry_size; + ch->usermode_submit_enabled = false; + } + + gk20a_gr_flush_channel_tlb(gr); + + nvgpu_dma_unmap_free(ch_vm, &ch->gpfifo.mem); + nvgpu_big_free(g, ch->gpfifo.pipe); + memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc)); + + channel_gk20a_free_priv_cmdbuf(ch); + + /* sync must be destroyed before releasing channel vm */ + nvgpu_mutex_acquire(&ch->sync_lock); + if (ch->sync) { + gk20a_channel_sync_destroy(ch->sync, false); + ch->sync = NULL; + } + if (ch->user_sync) { + /* + * Set user managed syncpoint to safe state + * But it's already done if channel has timedout + */ + if (ch->has_timedout) + gk20a_channel_sync_destroy(ch->user_sync, false); + else + gk20a_channel_sync_destroy(ch->user_sync, true); + ch->user_sync = NULL; + } + nvgpu_mutex_release(&ch->sync_lock); + + /* + * free the channel used semaphore index. + * we need to do this before releasing the address space, + * as the semaphore pool might get freed after that point. + */ + if (ch->hw_sema) + nvgpu_semaphore_free_hw_sema(ch); + + /* + * When releasing the channel we unbind the VM - so release the ref. + */ + nvgpu_vm_put(ch_vm); + + /* make sure we don't have deferred interrupts pending that + * could still touch the channel */ + nvgpu_wait_for_deferred_interrupts(g); + +unbind: + g->ops.fifo.unbind_channel(ch); + g->ops.fifo.free_inst(g, ch); + + /* put back the channel-wide submit ref from init */ + if (ch->deterministic) { + nvgpu_rwsem_down_read(&g->deterministic_busy); + ch->deterministic = false; + if (!ch->deterministic_railgate_allowed) + gk20a_idle(g); + ch->deterministic_railgate_allowed = false; + + nvgpu_rwsem_up_read(&g->deterministic_busy); + } + + ch->vpr = false; + ch->vm = NULL; + + WARN_ON(ch->sync); + + /* unlink all debug sessions */ + nvgpu_mutex_acquire(&g->dbg_sessions_lock); + + nvgpu_list_for_each_entry_safe(session_data, tmp_s, + &ch->dbg_s_list, dbg_session_data, dbg_s_entry) { + dbg_s = session_data->dbg_s; + nvgpu_mutex_acquire(&dbg_s->ch_list_lock); + nvgpu_list_for_each_entry_safe(ch_data, tmp, &dbg_s->ch_list, + dbg_session_channel_data, ch_entry) { + if (ch_data->chid == ch->chid) + ch_data->unbind_single_channel(dbg_s, ch_data); + } + nvgpu_mutex_release(&dbg_s->ch_list_lock); + } + + nvgpu_mutex_release(&g->dbg_sessions_lock); + + /* free pre-allocated resources, if applicable */ + if (channel_gk20a_is_prealloc_enabled(ch)) + channel_gk20a_free_prealloc_resources(ch); + +#if GK20A_CHANNEL_REFCOUNT_TRACKING + memset(ch->ref_actions, 0, sizeof(ch->ref_actions)); + ch->ref_actions_put = 0; +#endif + + /* make sure we catch accesses of unopened channels in case + * there's non-refcounted channel pointers hanging around */ + ch->g = NULL; + nvgpu_smp_wmb(); + + /* ALWAYS last */ + free_channel(f, ch); +} + +static void gk20a_channel_dump_ref_actions(struct channel_gk20a *ch) +{ +#if GK20A_CHANNEL_REFCOUNT_TRACKING + size_t i, get; + s64 now = nvgpu_current_time_ms(); + s64 prev = 0; + struct gk20a *g = ch->g; + + nvgpu_spinlock_acquire(&ch->ref_actions_lock); + + nvgpu_info(g, "ch %d: refs %d. Actions, most recent last:", + ch->chid, nvgpu_atomic_read(&ch->ref_count)); + + /* start at the oldest possible entry. put is next insertion point */ + get = ch->ref_actions_put; + + /* + * If the buffer is not full, this will first loop to the oldest entry, + * skipping not-yet-initialized entries. There is no ref_actions_get. + */ + for (i = 0; i < GK20A_CHANNEL_REFCOUNT_TRACKING; i++) { + struct channel_gk20a_ref_action *act = &ch->ref_actions[get]; + + if (act->trace.nr_entries) { + nvgpu_info(g, + "%s ref %zu steps ago (age %lld ms, diff %lld ms)", + act->type == channel_gk20a_ref_action_get + ? "GET" : "PUT", + GK20A_CHANNEL_REFCOUNT_TRACKING - 1 - i, + now - act->timestamp_ms, + act->timestamp_ms - prev); + + print_stack_trace(&act->trace, 0); + prev = act->timestamp_ms; + } + + get = (get + 1) % GK20A_CHANNEL_REFCOUNT_TRACKING; + } + + nvgpu_spinlock_release(&ch->ref_actions_lock); +#endif +} + +static void gk20a_channel_save_ref_source(struct channel_gk20a *ch, + enum channel_gk20a_ref_action_type type) +{ +#if GK20A_CHANNEL_REFCOUNT_TRACKING + struct channel_gk20a_ref_action *act; + + nvgpu_spinlock_acquire(&ch->ref_actions_lock); + + act = &ch->ref_actions[ch->ref_actions_put]; + act->type = type; + act->trace.max_entries = GK20A_CHANNEL_REFCOUNT_TRACKING_STACKLEN; + act->trace.nr_entries = 0; + act->trace.skip = 3; /* onwards from the caller of this */ + act->trace.entries = act->trace_entries; + save_stack_trace(&act->trace); + act->timestamp_ms = nvgpu_current_time_ms(); + ch->ref_actions_put = (ch->ref_actions_put + 1) % + GK20A_CHANNEL_REFCOUNT_TRACKING; + + nvgpu_spinlock_release(&ch->ref_actions_lock); +#endif +} + +/* Try to get a reference to the channel. Return nonzero on success. If fails, + * the channel is dead or being freed elsewhere and you must not touch it. + * + * Always when a channel_gk20a pointer is seen and about to be used, a + * reference must be held to it - either by you or the caller, which should be + * documented well or otherwise clearly seen. This usually boils down to the + * file from ioctls directly, or an explicit get in exception handlers when the + * channel is found by a chid. + * + * Most global functions in this file require a reference to be held by the + * caller. + */ +struct channel_gk20a *_gk20a_channel_get(struct channel_gk20a *ch, + const char *caller) { + struct channel_gk20a *ret; + + nvgpu_spinlock_acquire(&ch->ref_obtain_lock); + + if (likely(ch->referenceable)) { + gk20a_channel_save_ref_source(ch, channel_gk20a_ref_action_get); + nvgpu_atomic_inc(&ch->ref_count); + ret = ch; + } else + ret = NULL; + + nvgpu_spinlock_release(&ch->ref_obtain_lock); + + if (ret) + trace_gk20a_channel_get(ch->chid, caller); + + return ret; +} + +void _gk20a_channel_put(struct channel_gk20a *ch, const char *caller) +{ + gk20a_channel_save_ref_source(ch, channel_gk20a_ref_action_put); + trace_gk20a_channel_put(ch->chid, caller); + nvgpu_atomic_dec(&ch->ref_count); + nvgpu_cond_broadcast(&ch->ref_count_dec_wq); + + /* More puts than gets. Channel is probably going to get + * stuck. */ + WARN_ON(nvgpu_atomic_read(&ch->ref_count) < 0); + + /* Also, more puts than gets. ref_count can go to 0 only if + * the channel is closing. Channel is probably going to get + * stuck. */ + WARN_ON(nvgpu_atomic_read(&ch->ref_count) == 0 && ch->referenceable); +} + +void gk20a_channel_close(struct channel_gk20a *ch) +{ + gk20a_free_channel(ch, false); +} + +/* + * Be careful with this - it is meant for terminating channels when we know the + * driver is otherwise dying. Ref counts and the like are ignored by this + * version of the cleanup. + */ +void __gk20a_channel_kill(struct channel_gk20a *ch) +{ + gk20a_free_channel(ch, true); +} + +struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g, + s32 runlist_id, + bool is_privileged_channel, + pid_t pid, pid_t tid) +{ + struct fifo_gk20a *f = &g->fifo; + struct channel_gk20a *ch; + + /* compatibility with existing code */ + if (!gk20a_fifo_is_valid_runlist_id(g, runlist_id)) { + runlist_id = gk20a_fifo_get_gr_runlist_id(g); + } + + nvgpu_log_fn(g, " "); + + ch = allocate_channel(f); + if (ch == NULL) { + /* TBD: we want to make this virtualizable */ + nvgpu_err(g, "out of hw chids"); + return NULL; + } + + trace_gk20a_open_new_channel(ch->chid); + + BUG_ON(ch->g); + ch->g = g; + + /* Runlist for the channel */ + ch->runlist_id = runlist_id; + + /* Channel privilege level */ + ch->is_privileged_channel = is_privileged_channel; + + ch->pid = tid; + ch->tgid = pid; /* process granularity for FECS traces */ + + if (g->ops.fifo.alloc_inst(g, ch)) { + ch->g = NULL; + free_channel(f, ch); + nvgpu_err(g, + "failed to open gk20a channel, out of inst mem"); + return NULL; + } + + /* now the channel is in a limbo out of the free list but not marked as + * alive and used (i.e. get-able) yet */ + + /* By default, channel is regular (non-TSG) channel */ + ch->tsgid = NVGPU_INVALID_TSG_ID; + + /* clear ctxsw timeout counter and update timestamp */ + ch->timeout_accumulated_ms = 0; + ch->timeout_gpfifo_get = 0; + /* set gr host default timeout */ + ch->timeout_ms_max = gk20a_get_gr_idle_timeout(g); + ch->timeout_debug_dump = true; + ch->has_timedout = false; + + /* init kernel watchdog timeout */ + ch->timeout.enabled = true; + ch->timeout.limit_ms = g->ch_wdt_timeout_ms; + ch->timeout.debug_dump = true; + + ch->obj_class = 0; + ch->subctx_id = 0; + ch->runqueue_sel = 0; + + ch->mmu_nack_handled = false; + + /* The channel is *not* runnable at this point. It still needs to have + * an address space bound and allocate a gpfifo and grctx. */ + + nvgpu_cond_init(&ch->notifier_wq); + nvgpu_cond_init(&ch->semaphore_wq); + + if (g->os_channel.open) + g->os_channel.open(ch); + + /* Mark the channel alive, get-able, with 1 initial use + * references. The initial reference will be decreased in + * gk20a_free_channel() */ + ch->referenceable = true; + nvgpu_atomic_set(&ch->ref_count, 1); + nvgpu_smp_wmb(); + + return ch; +} + +/* allocate private cmd buffer. + used for inserting commands before/after user submitted buffers. */ +static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c) +{ + struct gk20a *g = c->g; + struct vm_gk20a *ch_vm = c->vm; + struct priv_cmd_queue *q = &c->priv_cmd_q; + u32 size; + int err = 0; + + /* + * Compute the amount of priv_cmdbuf space we need. In general the worst + * case is the kernel inserts both a semaphore pre-fence and post-fence. + * Any sync-pt fences will take less memory so we can ignore them for + * now. + * + * A semaphore ACQ (fence-wait) is 8 dwords: semaphore_a, semaphore_b, + * semaphore_c, and semaphore_d. A semaphore INCR (fence-get) will be 10 + * dwords: all the same as an ACQ plus a non-stalling intr which is + * another 2 dwords. + * + * Lastly the number of gpfifo entries per channel is fixed so at most + * we can use 2/3rds of the gpfifo entries (1 pre-fence entry, one + * userspace entry, and one post-fence entry). Thus the computation is: + * + * (gpfifo entry number * (2 / 3) * (8 + 10) * 4 bytes. + */ + size = roundup_pow_of_two(c->gpfifo.entry_num * + 2 * 18 * sizeof(u32) / 3); + + err = nvgpu_dma_alloc_map_sys(ch_vm, size, &q->mem); + if (err) { + nvgpu_err(g, "%s: memory allocation failed", __func__); + goto clean_up; + } + + q->size = q->mem.size / sizeof (u32); + + return 0; + +clean_up: + channel_gk20a_free_priv_cmdbuf(c); + return err; +} + +static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c) +{ + struct vm_gk20a *ch_vm = c->vm; + struct priv_cmd_queue *q = &c->priv_cmd_q; + + if (q->size == 0) + return; + + nvgpu_dma_unmap_free(ch_vm, &q->mem); + + memset(q, 0, sizeof(struct priv_cmd_queue)); +} + +/* allocate a cmd buffer with given size. size is number of u32 entries */ +int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size, + struct priv_cmd_entry *e) +{ + struct priv_cmd_queue *q = &c->priv_cmd_q; + u32 free_count; + u32 size = orig_size; + + nvgpu_log_fn(c->g, "size %d", orig_size); + + if (!e) { + nvgpu_err(c->g, + "ch %d: priv cmd entry is null", + c->chid); + return -EINVAL; + } + + /* if free space in the end is less than requested, increase the size + * to make the real allocated space start from beginning. */ + if (q->put + size > q->size) + size = orig_size + (q->size - q->put); + + nvgpu_log_info(c->g, "ch %d: priv cmd queue get:put %d:%d", + c->chid, q->get, q->put); + + free_count = (q->size - (q->put - q->get) - 1) % q->size; + + if (size > free_count) + return -EAGAIN; + + e->size = orig_size; + e->mem = &q->mem; + + /* if we have increased size to skip free space in the end, set put + to beginning of cmd buffer (0) + size */ + if (size != orig_size) { + e->off = 0; + e->gva = q->mem.gpu_va; + q->put = orig_size; + } else { + e->off = q->put; + e->gva = q->mem.gpu_va + q->put * sizeof(u32); + q->put = (q->put + orig_size) & (q->size - 1); + } + + /* we already handled q->put + size > q->size so BUG_ON this */ + BUG_ON(q->put > q->size); + + /* + * commit the previous writes before making the entry valid. + * see the corresponding nvgpu_smp_rmb() in gk20a_free_priv_cmdbuf(). + */ + nvgpu_smp_wmb(); + + e->valid = true; + nvgpu_log_fn(c->g, "done"); + + return 0; +} + +/* Don't call this to free an explict cmd entry. + * It doesn't update priv_cmd_queue get/put */ +void free_priv_cmdbuf(struct channel_gk20a *c, + struct priv_cmd_entry *e) +{ + if (channel_gk20a_is_prealloc_enabled(c)) + memset(e, 0, sizeof(struct priv_cmd_entry)); + else + nvgpu_kfree(c->g, e); +} + +int channel_gk20a_alloc_job(struct channel_gk20a *c, + struct channel_gk20a_job **job_out) +{ + int err = 0; + + if (channel_gk20a_is_prealloc_enabled(c)) { + int put = c->joblist.pre_alloc.put; + int get = c->joblist.pre_alloc.get; + + /* + * ensure all subsequent reads happen after reading get. + * see corresponding nvgpu_smp_wmb in + * gk20a_channel_clean_up_jobs() + */ + nvgpu_smp_rmb(); + + if (CIRC_SPACE(put, get, c->joblist.pre_alloc.length)) + *job_out = &c->joblist.pre_alloc.jobs[put]; + else { + nvgpu_warn(c->g, + "out of job ringbuffer space"); + err = -EAGAIN; + } + } else { + *job_out = nvgpu_kzalloc(c->g, + sizeof(struct channel_gk20a_job)); + if (!*job_out) + err = -ENOMEM; + } + + return err; +} + +void channel_gk20a_free_job(struct channel_gk20a *c, + struct channel_gk20a_job *job) +{ + /* + * In case of pre_allocated jobs, we need to clean out + * the job but maintain the pointers to the priv_cmd_entry, + * since they're inherently tied to the job node. + */ + if (channel_gk20a_is_prealloc_enabled(c)) { + struct priv_cmd_entry *wait_cmd = job->wait_cmd; + struct priv_cmd_entry *incr_cmd = job->incr_cmd; + memset(job, 0, sizeof(*job)); + job->wait_cmd = wait_cmd; + job->incr_cmd = incr_cmd; + } else + nvgpu_kfree(c->g, job); +} + +void channel_gk20a_joblist_lock(struct channel_gk20a *c) +{ + if (channel_gk20a_is_prealloc_enabled(c)) + nvgpu_mutex_acquire(&c->joblist.pre_alloc.read_lock); + else + nvgpu_spinlock_acquire(&c->joblist.dynamic.lock); +} + +void channel_gk20a_joblist_unlock(struct channel_gk20a *c) +{ + if (channel_gk20a_is_prealloc_enabled(c)) + nvgpu_mutex_release(&c->joblist.pre_alloc.read_lock); + else + nvgpu_spinlock_release(&c->joblist.dynamic.lock); +} + +static struct channel_gk20a_job *channel_gk20a_joblist_peek( + struct channel_gk20a *c) +{ + int get; + struct channel_gk20a_job *job = NULL; + + if (channel_gk20a_is_prealloc_enabled(c)) { + if (!channel_gk20a_joblist_is_empty(c)) { + get = c->joblist.pre_alloc.get; + job = &c->joblist.pre_alloc.jobs[get]; + } + } else { + if (!nvgpu_list_empty(&c->joblist.dynamic.jobs)) + job = nvgpu_list_first_entry(&c->joblist.dynamic.jobs, + channel_gk20a_job, list); + } + + return job; +} + +static void channel_gk20a_joblist_add(struct channel_gk20a *c, + struct channel_gk20a_job *job) +{ + if (channel_gk20a_is_prealloc_enabled(c)) { + c->joblist.pre_alloc.put = (c->joblist.pre_alloc.put + 1) % + (c->joblist.pre_alloc.length); + } else { + nvgpu_list_add_tail(&job->list, &c->joblist.dynamic.jobs); + } +} + +static void channel_gk20a_joblist_delete(struct channel_gk20a *c, + struct channel_gk20a_job *job) +{ + if (channel_gk20a_is_prealloc_enabled(c)) { + c->joblist.pre_alloc.get = (c->joblist.pre_alloc.get + 1) % + (c->joblist.pre_alloc.length); + } else { + nvgpu_list_del(&job->list); + } +} + +bool channel_gk20a_joblist_is_empty(struct channel_gk20a *c) +{ + if (channel_gk20a_is_prealloc_enabled(c)) { + int get = c->joblist.pre_alloc.get; + int put = c->joblist.pre_alloc.put; + return !(CIRC_CNT(put, get, c->joblist.pre_alloc.length)); + } + + return nvgpu_list_empty(&c->joblist.dynamic.jobs); +} + +bool channel_gk20a_is_prealloc_enabled(struct channel_gk20a *c) +{ + bool pre_alloc_enabled = c->joblist.pre_alloc.enabled; + + nvgpu_smp_rmb(); + return pre_alloc_enabled; +} + +static int channel_gk20a_prealloc_resources(struct channel_gk20a *c, + unsigned int num_jobs) +{ + unsigned int i; + int err; + size_t size; + struct priv_cmd_entry *entries = NULL; + + if (channel_gk20a_is_prealloc_enabled(c) || !num_jobs) + return -EINVAL; + + /* + * pre-allocate the job list. + * since vmalloc take in an unsigned long, we need + * to make sure we don't hit an overflow condition + */ + size = sizeof(struct channel_gk20a_job); + if (num_jobs <= ULONG_MAX / size) + c->joblist.pre_alloc.jobs = nvgpu_vzalloc(c->g, + num_jobs * size); + if (!c->joblist.pre_alloc.jobs) { + err = -ENOMEM; + goto clean_up; + } + + /* + * pre-allocate 2x priv_cmd_entry for each job up front. + * since vmalloc take in an unsigned long, we need + * to make sure we don't hit an overflow condition + */ + size = sizeof(struct priv_cmd_entry); + if (num_jobs <= ULONG_MAX / (size << 1)) + entries = nvgpu_vzalloc(c->g, (num_jobs << 1) * size); + if (!entries) { + err = -ENOMEM; + goto clean_up_joblist; + } + + for (i = 0; i < num_jobs; i++) { + c->joblist.pre_alloc.jobs[i].wait_cmd = &entries[i]; + c->joblist.pre_alloc.jobs[i].incr_cmd = + &entries[i + num_jobs]; + } + + /* pre-allocate a fence pool */ + err = gk20a_alloc_fence_pool(c, num_jobs); + if (err) + goto clean_up_priv_cmd; + + c->joblist.pre_alloc.length = num_jobs; + c->joblist.pre_alloc.put = 0; + c->joblist.pre_alloc.get = 0; + + /* + * commit the previous writes before setting the flag. + * see corresponding nvgpu_smp_rmb in + * channel_gk20a_is_prealloc_enabled() + */ + nvgpu_smp_wmb(); + c->joblist.pre_alloc.enabled = true; + + return 0; + +clean_up_priv_cmd: + nvgpu_vfree(c->g, entries); +clean_up_joblist: + nvgpu_vfree(c->g, c->joblist.pre_alloc.jobs); +clean_up: + memset(&c->joblist.pre_alloc, 0, sizeof(c->joblist.pre_alloc)); + return err; +} + +static void channel_gk20a_free_prealloc_resources(struct channel_gk20a *c) +{ + nvgpu_vfree(c->g, c->joblist.pre_alloc.jobs[0].wait_cmd); + nvgpu_vfree(c->g, c->joblist.pre_alloc.jobs); + gk20a_free_fence_pool(c); + + /* + * commit the previous writes before disabling the flag. + * see corresponding nvgpu_smp_rmb in + * channel_gk20a_is_prealloc_enabled() + */ + nvgpu_smp_wmb(); + c->joblist.pre_alloc.enabled = false; +} + +int gk20a_channel_alloc_gpfifo(struct channel_gk20a *c, + struct nvgpu_gpfifo_args *gpfifo_args) +{ + struct gk20a *g = c->g; + struct vm_gk20a *ch_vm; + u32 gpfifo_size, gpfifo_entry_size; + int err = 0; + unsigned long acquire_timeout; + + gpfifo_size = gpfifo_args->num_entries; + gpfifo_entry_size = nvgpu_get_gpfifo_entry_size(); + + if (gpfifo_args->flags & NVGPU_GPFIFO_FLAGS_SUPPORT_VPR) + c->vpr = true; + + if (gpfifo_args->flags & NVGPU_GPFIFO_FLAGS_SUPPORT_DETERMINISTIC) { + nvgpu_rwsem_down_read(&g->deterministic_busy); + /* + * Railgating isn't deterministic; instead of disallowing + * railgating globally, take a power refcount for this + * channel's lifetime. The gk20a_idle() pair for this happens + * when the channel gets freed. + * + * Deterministic flag and this busy must be atomic within the + * busy lock. + */ + err = gk20a_busy(g); + if (err) { + nvgpu_rwsem_up_read(&g->deterministic_busy); + return err; + } + + c->deterministic = true; + nvgpu_rwsem_up_read(&g->deterministic_busy); + } + + /* an address space needs to have been bound at this point. */ + if (!gk20a_channel_as_bound(c)) { + nvgpu_err(g, + "not bound to an address space at time of gpfifo" + " allocation."); + err = -EINVAL; + goto clean_up_idle; + } + ch_vm = c->vm; + + if (c->gpfifo.mem.size) { + nvgpu_err(g, "channel %d :" + "gpfifo already allocated", c->chid); + err = -EEXIST; + goto clean_up_idle; + } + + if (gpfifo_args->flags & NVGPU_GPFIFO_FLAGS_USERMODE_SUPPORT) { + if (g->ops.fifo.alloc_usermode_buffers) { + err = g->ops.fifo.alloc_usermode_buffers(c, + gpfifo_args); + if (err) { + nvgpu_err(g, "Usermode buffer alloc failed"); + goto clean_up; + } + c->userd_iova = nvgpu_mem_get_addr(g, + &c->usermode_userd); + c->usermode_submit_enabled = true; + } else { + nvgpu_err(g, "Usermode submit not supported"); + err = -EINVAL; + goto clean_up; + } + } + + err = nvgpu_dma_alloc_map_sys(ch_vm, + gpfifo_size * gpfifo_entry_size, + &c->gpfifo.mem); + if (err) { + nvgpu_err(g, "%s: memory allocation failed", __func__); + goto clean_up_usermode; + } + + if (c->gpfifo.mem.aperture == APERTURE_VIDMEM) { + c->gpfifo.pipe = nvgpu_big_malloc(g, + gpfifo_size * gpfifo_entry_size); + if (!c->gpfifo.pipe) { + err = -ENOMEM; + goto clean_up_unmap; + } + } + + c->gpfifo.entry_num = gpfifo_size; + c->gpfifo.get = c->gpfifo.put = 0; + + nvgpu_log_info(g, "channel %d : gpfifo_base 0x%016llx, size %d", + c->chid, c->gpfifo.mem.gpu_va, c->gpfifo.entry_num); + + g->ops.fifo.setup_userd(c); + + if (!g->aggressive_sync_destroy_thresh) { + nvgpu_mutex_acquire(&c->sync_lock); + c->sync = gk20a_channel_sync_create(c, false); + if (!c->sync) { + err = -ENOMEM; + nvgpu_mutex_release(&c->sync_lock); + goto clean_up_unmap; + } + nvgpu_mutex_release(&c->sync_lock); + + if (g->ops.fifo.resetup_ramfc) { + err = g->ops.fifo.resetup_ramfc(c); + if (err) + goto clean_up_sync; + } + } + + if (!nvgpu_is_timeouts_enabled(c->g) || !c->timeout.enabled) + acquire_timeout = 0; + else + acquire_timeout = c->timeout.limit_ms; + + err = g->ops.fifo.setup_ramfc(c, c->gpfifo.mem.gpu_va, + c->gpfifo.entry_num, + acquire_timeout, gpfifo_args->flags); + if (err) + goto clean_up_sync; + + /* TBD: setup engine contexts */ + + if (gpfifo_args->num_inflight_jobs) { + err = channel_gk20a_prealloc_resources(c, + gpfifo_args->num_inflight_jobs); + if (err) + goto clean_up_sync; + } + + err = channel_gk20a_alloc_priv_cmdbuf(c); + if (err) + goto clean_up_prealloc; + + err = channel_gk20a_update_runlist(c, true); + if (err) + goto clean_up_priv_cmd; + + g->ops.fifo.bind_channel(c); + + nvgpu_log_fn(g, "done"); + return 0; + +clean_up_priv_cmd: + channel_gk20a_free_priv_cmdbuf(c); +clean_up_prealloc: + if (gpfifo_args->num_inflight_jobs) + channel_gk20a_free_prealloc_resources(c); +clean_up_sync: + if (c->sync) { + gk20a_channel_sync_destroy(c->sync, false); + c->sync = NULL; + } +clean_up_unmap: + nvgpu_big_free(g, c->gpfifo.pipe); + nvgpu_dma_unmap_free(ch_vm, &c->gpfifo.mem); +clean_up_usermode: + if (c->usermode_submit_enabled) { + gk20a_channel_free_usermode_buffers(c); + c->userd_iova = nvgpu_mem_get_addr(g, &g->fifo.userd) + + c->chid * g->fifo.userd_entry_size; + c->usermode_submit_enabled = false; + } +clean_up: + memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc)); +clean_up_idle: + if (c->deterministic) { + nvgpu_rwsem_down_read(&g->deterministic_busy); + gk20a_idle(g); + c->deterministic = false; + nvgpu_rwsem_up_read(&g->deterministic_busy); + } + nvgpu_err(g, "fail"); + return err; +} + +void gk20a_channel_free_usermode_buffers(struct channel_gk20a *c) +{ + if (nvgpu_mem_is_valid(&c->usermode_userd)) + nvgpu_dma_free(c->g, &c->usermode_userd); +} + +/* Update with this periodically to determine how the gpfifo is draining. */ +static inline u32 update_gp_get(struct gk20a *g, + struct channel_gk20a *c) +{ + u32 new_get = g->ops.fifo.userd_gp_get(g, c); + + if (new_get < c->gpfifo.get) + c->gpfifo.wrap = !c->gpfifo.wrap; + c->gpfifo.get = new_get; + return new_get; +} + +u32 nvgpu_gp_free_count(struct channel_gk20a *c) +{ + return (c->gpfifo.entry_num - (c->gpfifo.put - c->gpfifo.get) - 1) % + c->gpfifo.entry_num; +} + +bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch, + u32 timeout_delta_ms, bool *progress) +{ + u32 gpfifo_get = update_gp_get(ch->g, ch); + + /* Count consequent timeout isr */ + if (gpfifo_get == ch->timeout_gpfifo_get) { + /* we didn't advance since previous channel timeout check */ + ch->timeout_accumulated_ms += timeout_delta_ms; + *progress = false; + } else { + /* first timeout isr encountered */ + ch->timeout_accumulated_ms = timeout_delta_ms; + *progress = true; + } + + ch->timeout_gpfifo_get = gpfifo_get; + + return nvgpu_is_timeouts_enabled(ch->g) && + ch->timeout_accumulated_ms > ch->timeout_ms_max; +} + +u32 nvgpu_get_gp_free_count(struct channel_gk20a *c) +{ + update_gp_get(c->g, c); + return nvgpu_gp_free_count(c); +} + +static void __gk20a_channel_timeout_start(struct channel_gk20a *ch) +{ + ch->timeout.gp_get = ch->g->ops.fifo.userd_gp_get(ch->g, ch); + ch->timeout.pb_get = ch->g->ops.fifo.userd_pb_get(ch->g, ch); + ch->timeout.running = true; + nvgpu_timeout_init(ch->g, &ch->timeout.timer, + ch->timeout.limit_ms, + NVGPU_TIMER_CPU_TIMER); +} + +/** + * Start a timeout counter (watchdog) on this channel. + * + * Trigger a watchdog to recover the channel after the per-platform timeout + * duration (but strictly no earlier) if the channel hasn't advanced within + * that time. + * + * If the timeout is already running, do nothing. This should be called when + * new jobs are submitted. The timeout will stop when the last tracked job + * finishes, making the channel idle. + * + * The channel's gpfifo read pointer will be used to determine if the job has + * actually stuck at that time. After the timeout duration has expired, a + * worker thread will consider the channel stuck and recover it if stuck. + */ +static void gk20a_channel_timeout_start(struct channel_gk20a *ch) +{ + if (!nvgpu_is_timeouts_enabled(ch->g)) + return; + + if (!ch->timeout.enabled) + return; + + nvgpu_raw_spinlock_acquire(&ch->timeout.lock); + + if (ch->timeout.running) { + nvgpu_raw_spinlock_release(&ch->timeout.lock); + return; + } + __gk20a_channel_timeout_start(ch); + nvgpu_raw_spinlock_release(&ch->timeout.lock); +} + +/** + * Stop a running timeout counter (watchdog) on this channel. + * + * Make the watchdog consider the channel not running, so that it won't get + * recovered even if no progress is detected. Progress is not tracked if the + * watchdog is turned off. + * + * No guarantees are made about concurrent execution of the timeout handler. + * (This should be called from an update handler running in the same thread + * with the watchdog.) + */ +static bool gk20a_channel_timeout_stop(struct channel_gk20a *ch) +{ + bool was_running; + + nvgpu_raw_spinlock_acquire(&ch->timeout.lock); + was_running = ch->timeout.running; + ch->timeout.running = false; + nvgpu_raw_spinlock_release(&ch->timeout.lock); + return was_running; +} + +/** + * Continue a previously stopped timeout + * + * Enable the timeout again but don't reinitialize its timer. + * + * No guarantees are made about concurrent execution of the timeout handler. + * (This should be called from an update handler running in the same thread + * with the watchdog.) + */ +static void gk20a_channel_timeout_continue(struct channel_gk20a *ch) +{ + nvgpu_raw_spinlock_acquire(&ch->timeout.lock); + ch->timeout.running = true; + nvgpu_raw_spinlock_release(&ch->timeout.lock); +} + +/** + * Rewind the timeout on each non-dormant channel. + * + * Reschedule the timeout of each active channel for which timeouts are running + * as if something was happened on each channel right now. This should be + * called when a global hang is detected that could cause a false positive on + * other innocent channels. + */ +void gk20a_channel_timeout_restart_all_channels(struct gk20a *g) +{ + struct fifo_gk20a *f = &g->fifo; + u32 chid; + + for (chid = 0; chid < f->num_channels; chid++) { + struct channel_gk20a *ch = &f->channel[chid]; + + if (!gk20a_channel_get(ch)) + continue; + + nvgpu_raw_spinlock_acquire(&ch->timeout.lock); + if (ch->timeout.running) + __gk20a_channel_timeout_start(ch); + nvgpu_raw_spinlock_release(&ch->timeout.lock); + + gk20a_channel_put(ch); + } +} + +/** + * Check if a timed out channel has hung and recover it if it has. + * + * Test if this channel has really got stuck at this point by checking if its + * {gp,pb}_get has advanced or not. If no {gp,pb}_get action happened since + * when the watchdog was started and it's timed out, force-reset the channel. + * + * The gpu is implicitly on at this point, because the watchdog can only run on + * channels that have submitted jobs pending for cleanup. + */ +static void gk20a_channel_timeout_handler(struct channel_gk20a *ch) +{ + struct gk20a *g = ch->g; + u32 gp_get; + u32 new_gp_get; + u64 pb_get; + u64 new_pb_get; + + nvgpu_log_fn(g, " "); + + /* Get status but keep timer running */ + nvgpu_raw_spinlock_acquire(&ch->timeout.lock); + gp_get = ch->timeout.gp_get; + pb_get = ch->timeout.pb_get; + nvgpu_raw_spinlock_release(&ch->timeout.lock); + + new_gp_get = g->ops.fifo.userd_gp_get(ch->g, ch); + new_pb_get = g->ops.fifo.userd_pb_get(ch->g, ch); + + if (new_gp_get != gp_get || new_pb_get != pb_get) { + /* Channel has advanced, rewind timer */ + gk20a_channel_timeout_stop(ch); + gk20a_channel_timeout_start(ch); + return; + } + + if (!nvgpu_timeout_peek_expired(&ch->timeout.timer)) { + /* Seems stuck but waiting to time out */ + return; + } + + nvgpu_err(g, "Job on channel %d timed out", + ch->chid); + + /* force reset calls gk20a_debug_dump but not this */ + if (ch->timeout.debug_dump) + gk20a_gr_debug_dump(g); + + g->ops.fifo.force_reset_ch(ch, + NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT, + ch->timeout.debug_dump); +} + +/** + * Test if the per-channel watchdog is on; check the timeout in that case. + * + * Each channel has an expiration time based watchdog. The timer is + * (re)initialized in two situations: when a new job is submitted on an idle + * channel and when the timeout is checked but progress is detected. The + * watchdog timeout limit is a coarse sliding window. + * + * The timeout is stopped (disabled) after the last job in a row finishes + * and marks the channel idle. + */ +static void gk20a_channel_timeout_check(struct channel_gk20a *ch) +{ + bool running; + + nvgpu_raw_spinlock_acquire(&ch->timeout.lock); + running = ch->timeout.running; + nvgpu_raw_spinlock_release(&ch->timeout.lock); + + if (running) + gk20a_channel_timeout_handler(ch); +} + +/** + * Loop every living channel, check timeouts and handle stuck channels. + */ +static void gk20a_channel_poll_timeouts(struct gk20a *g) +{ + unsigned int chid; + + + for (chid = 0; chid < g->fifo.num_channels; chid++) { + struct channel_gk20a *ch = &g->fifo.channel[chid]; + + if (gk20a_channel_get(ch)) { + gk20a_channel_timeout_check(ch); + gk20a_channel_put(ch); + } + } +} + +/* + * Process one scheduled work item for this channel. Currently, the only thing + * the worker does is job cleanup handling. + */ +static void gk20a_channel_worker_process_ch(struct channel_gk20a *ch) +{ + nvgpu_log_fn(ch->g, " "); + + gk20a_channel_clean_up_jobs(ch, true); + + /* ref taken when enqueued */ + gk20a_channel_put(ch); +} + +/** + * Tell the worker that one more work needs to be done. + * + * Increase the work counter to synchronize the worker with the new work. Wake + * up the worker. If the worker was already running, it will handle this work + * before going to sleep. + */ +static int __gk20a_channel_worker_wakeup(struct gk20a *g) +{ + int put; + + nvgpu_log_fn(g, " "); + + /* + * Currently, the only work type is associated with a lock, which deals + * with any necessary barriers. If a work type with no locking were + * added, a nvgpu_smp_wmb() would be needed here. See + * ..worker_pending() for a pair. + */ + + put = nvgpu_atomic_inc_return(&g->channel_worker.put); + nvgpu_cond_signal_interruptible(&g->channel_worker.wq); + + return put; +} + +/** + * Test if there is some work pending. + * + * This is a pair for __gk20a_channel_worker_wakeup to be called from the + * worker. The worker has an internal work counter which is incremented once + * per finished work item. This is compared with the number of queued jobs, + * which may be channels on the items list or any other types of work. + */ +static bool __gk20a_channel_worker_pending(struct gk20a *g, int get) +{ + bool pending = nvgpu_atomic_read(&g->channel_worker.put) != get; + + /* + * This would be the place for a nvgpu_smp_rmb() pairing + * a nvgpu_smp_wmb() for a wakeup if we had any work with + * no implicit barriers caused by locking. + */ + + return pending; +} + +/** + * Process the queued works for the worker thread serially. + * + * Flush all the work items in the queue one by one. This may block timeout + * handling for a short while, as these are serialized. + */ +static void gk20a_channel_worker_process(struct gk20a *g, int *get) +{ + + while (__gk20a_channel_worker_pending(g, *get)) { + struct channel_gk20a *ch = NULL; + + /* + * If a channel is on the list, it's guaranteed to be handled + * eventually just once. However, the opposite is not true. A + * channel may be being processed if it's on the list or not. + * + * With this, processing channel works should be conservative + * as follows: it's always safe to look at a channel found in + * the list, and if someone enqueues the channel, it will be + * handled eventually, even if it's being handled at the same + * time. A channel is on the list only once; multiple calls to + * enqueue are harmless. + */ + nvgpu_spinlock_acquire(&g->channel_worker.items_lock); + if (!nvgpu_list_empty(&g->channel_worker.items)) { + ch = nvgpu_list_first_entry(&g->channel_worker.items, + channel_gk20a, + worker_item); + nvgpu_list_del(&ch->worker_item); + } + nvgpu_spinlock_release(&g->channel_worker.items_lock); + + if (!ch) { + /* + * Woke up for some other reason, but there are no + * other reasons than a channel added in the items list + * currently, so warn and ack the message. + */ + nvgpu_warn(g, "Spurious worker event!"); + ++*get; + break; + } + + gk20a_channel_worker_process_ch(ch); + ++*get; + } +} + +/* + * Look at channel states periodically, until canceled. Abort timed out + * channels serially. Process all work items found in the queue. + */ +static int gk20a_channel_poll_worker(void *arg) +{ + struct gk20a *g = (struct gk20a *)arg; + struct gk20a_worker *worker = &g->channel_worker; + unsigned long watchdog_interval = 100; /* milliseconds */ + struct nvgpu_timeout timeout; + int get = 0; + + nvgpu_log_fn(g, " "); + + nvgpu_timeout_init(g, &timeout, watchdog_interval, + NVGPU_TIMER_CPU_TIMER); + while (!nvgpu_thread_should_stop(&worker->poll_task)) { + int ret; + + ret = NVGPU_COND_WAIT_INTERRUPTIBLE( + &worker->wq, + __gk20a_channel_worker_pending(g, get), + watchdog_interval); + + if (ret == 0) + gk20a_channel_worker_process(g, &get); + + if (nvgpu_timeout_peek_expired(&timeout)) { + gk20a_channel_poll_timeouts(g); + nvgpu_timeout_init(g, &timeout, watchdog_interval, + NVGPU_TIMER_CPU_TIMER); + } + } + return 0; +} + +static int __nvgpu_channel_worker_start(struct gk20a *g) +{ + char thread_name[64]; + int err = 0; + + if (nvgpu_thread_is_running(&g->channel_worker.poll_task)) + return err; + + nvgpu_mutex_acquire(&g->channel_worker.start_lock); + + /* + * We don't want to grab a mutex on every channel update so we check + * again if the worker has been initialized before creating a new thread + */ + + /* + * Mutexes have implicit barriers, so there is no risk of a thread + * having a stale copy of the poll_task variable as the call to + * thread_is_running is volatile + */ + + if (nvgpu_thread_is_running(&g->channel_worker.poll_task)) { + nvgpu_mutex_release(&g->channel_worker.start_lock); + return err; + } + + snprintf(thread_name, sizeof(thread_name), + "nvgpu_channel_poll_%s", g->name); + + err = nvgpu_thread_create(&g->channel_worker.poll_task, g, + gk20a_channel_poll_worker, thread_name); + + nvgpu_mutex_release(&g->channel_worker.start_lock); + return err; +} +/** + * Initialize the channel worker's metadata and start the background thread. + */ +int nvgpu_channel_worker_init(struct gk20a *g) +{ + int err; + + nvgpu_atomic_set(&g->channel_worker.put, 0); + nvgpu_cond_init(&g->channel_worker.wq); + nvgpu_init_list_node(&g->channel_worker.items); + nvgpu_spinlock_init(&g->channel_worker.items_lock); + err = nvgpu_mutex_init(&g->channel_worker.start_lock); + if (err) + goto error_check; + + err = __nvgpu_channel_worker_start(g); +error_check: + if (err) { + nvgpu_err(g, "failed to start channel poller thread"); + return err; + } + return 0; +} + +void nvgpu_channel_worker_deinit(struct gk20a *g) +{ + nvgpu_mutex_acquire(&g->channel_worker.start_lock); + nvgpu_thread_stop(&g->channel_worker.poll_task); + nvgpu_mutex_release(&g->channel_worker.start_lock); +} + +/** + * Append a channel to the worker's list, if not there already. + * + * The worker thread processes work items (channels in its work list) and polls + * for other things. This adds @ch to the end of the list and wakes the worker + * up immediately. If the channel already existed in the list, it's not added, + * because in that case it has been scheduled already but has not yet been + * processed. + */ +static void gk20a_channel_worker_enqueue(struct channel_gk20a *ch) +{ + struct gk20a *g = ch->g; + + nvgpu_log_fn(g, " "); + + /* + * Warn if worker thread cannot run + */ + if (WARN_ON(__nvgpu_channel_worker_start(g))) { + nvgpu_warn(g, "channel worker cannot run!"); + return; + } + + /* + * Ref released when this item gets processed. The caller should hold + * one ref already, so normally shouldn't fail, but the channel could + * end up being freed between the time the caller got its reference and + * the time we end up here (e.g., if the client got killed); if so, just + * return. + */ + if (!gk20a_channel_get(ch)) { + nvgpu_info(g, "cannot get ch ref for worker!"); + return; + } + + nvgpu_spinlock_acquire(&g->channel_worker.items_lock); + if (!nvgpu_list_empty(&ch->worker_item)) { + /* + * Already queued, so will get processed eventually. + * The worker is probably awake already. + */ + nvgpu_spinlock_release(&g->channel_worker.items_lock); + gk20a_channel_put(ch); + return; + } + nvgpu_list_add_tail(&ch->worker_item, &g->channel_worker.items); + nvgpu_spinlock_release(&g->channel_worker.items_lock); + + __gk20a_channel_worker_wakeup(g); +} + +int gk20a_free_priv_cmdbuf(struct channel_gk20a *c, struct priv_cmd_entry *e) +{ + struct priv_cmd_queue *q = &c->priv_cmd_q; + struct gk20a *g = c->g; + + if (!e) + return 0; + + if (e->valid) { + /* read the entry's valid flag before reading its contents */ + nvgpu_smp_rmb(); + if ((q->get != e->off) && e->off != 0) + nvgpu_err(g, "requests out-of-order, ch=%d", + c->chid); + q->get = e->off + e->size; + } + + free_priv_cmdbuf(c, e); + + return 0; +} + +int gk20a_channel_add_job(struct channel_gk20a *c, + struct channel_gk20a_job *job, + bool skip_buffer_refcounting) +{ + struct vm_gk20a *vm = c->vm; + struct nvgpu_mapped_buf **mapped_buffers = NULL; + int err = 0, num_mapped_buffers = 0; + bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(c); + + if (!skip_buffer_refcounting) { + err = nvgpu_vm_get_buffers(vm, &mapped_buffers, + &num_mapped_buffers); + if (err) + return err; + } + + /* + * Ref to hold the channel open during the job lifetime. This is + * released by job cleanup launched via syncpt or sema interrupt. + */ + c = gk20a_channel_get(c); + + if (c) { + job->num_mapped_buffers = num_mapped_buffers; + job->mapped_buffers = mapped_buffers; + + gk20a_channel_timeout_start(c); + + if (!pre_alloc_enabled) + channel_gk20a_joblist_lock(c); + + /* + * ensure all pending write complete before adding to the list. + * see corresponding nvgpu_smp_rmb in + * gk20a_channel_clean_up_jobs() + */ + nvgpu_smp_wmb(); + channel_gk20a_joblist_add(c, job); + + if (!pre_alloc_enabled) + channel_gk20a_joblist_unlock(c); + } else { + err = -ETIMEDOUT; + goto err_put_buffers; + } + + return 0; + +err_put_buffers: + nvgpu_vm_put_buffers(vm, mapped_buffers, num_mapped_buffers); + + return err; +} + +/** + * Clean up job resources for further jobs to use. + * @clean_all: If true, process as many jobs as possible, otherwise just one. + * + * Loop all jobs from the joblist until a pending job is found, or just one if + * clean_all is not set. Pending jobs are detected from the job's post fence, + * so this is only done for jobs that have job tracking resources. Free all + * per-job memory for completed jobs; in case of preallocated resources, this + * opens up slots for new jobs to be submitted. + */ +void gk20a_channel_clean_up_jobs(struct channel_gk20a *c, + bool clean_all) +{ + struct vm_gk20a *vm; + struct channel_gk20a_job *job; + struct gk20a *g; + int job_finished = 0; + bool watchdog_on = false; + + c = gk20a_channel_get(c); + if (!c) + return; + + if (!c->g->power_on) { /* shutdown case */ + gk20a_channel_put(c); + return; + } + + vm = c->vm; + g = c->g; + + /* + * If !clean_all, we're in a condition where watchdog isn't supported + * anyway (this would be a no-op). + */ + if (clean_all) + watchdog_on = gk20a_channel_timeout_stop(c); + + /* Synchronize with abort cleanup that needs the jobs. */ + nvgpu_mutex_acquire(&c->joblist.cleanup_lock); + + while (1) { + bool completed; + + channel_gk20a_joblist_lock(c); + if (channel_gk20a_joblist_is_empty(c)) { + /* + * No jobs in flight, timeout will remain stopped until + * new jobs are submitted. + */ + channel_gk20a_joblist_unlock(c); + break; + } + + /* + * ensure that all subsequent reads occur after checking + * that we have a valid node. see corresponding nvgpu_smp_wmb in + * gk20a_channel_add_job(). + */ + nvgpu_smp_rmb(); + job = channel_gk20a_joblist_peek(c); + channel_gk20a_joblist_unlock(c); + + completed = gk20a_fence_is_expired(job->post_fence); + if (!completed) { + /* + * The watchdog eventually sees an updated gp_get if + * something happened in this loop. A new job can have + * been submitted between the above call to stop and + * this - in that case, this is a no-op and the new + * later timeout is still used. + */ + if (clean_all && watchdog_on) + gk20a_channel_timeout_continue(c); + break; + } + + WARN_ON(!c->sync); + + if (c->sync) { + if (c->has_os_fence_framework_support && + g->os_channel.os_fence_framework_inst_exists(c)) + g->os_channel.signal_os_fence_framework(c); + + if (g->aggressive_sync_destroy_thresh) { + nvgpu_mutex_acquire(&c->sync_lock); + if (nvgpu_atomic_dec_and_test( + &c->sync->refcount) && + g->aggressive_sync_destroy) { + gk20a_channel_sync_destroy(c->sync, + false); + c->sync = NULL; + } + nvgpu_mutex_release(&c->sync_lock); + } + } + + if (job->num_mapped_buffers) + nvgpu_vm_put_buffers(vm, job->mapped_buffers, + job->num_mapped_buffers); + + /* Remove job from channel's job list before we close the + * fences, to prevent other callers (gk20a_channel_abort) from + * trying to dereference post_fence when it no longer exists. + */ + channel_gk20a_joblist_lock(c); + channel_gk20a_joblist_delete(c, job); + channel_gk20a_joblist_unlock(c); + + /* Close the fence (this will unref the semaphore and release + * it to the pool). */ + gk20a_fence_put(job->post_fence); + + /* Free the private command buffers (wait_cmd first and + * then incr_cmd i.e. order of allocation) */ + gk20a_free_priv_cmdbuf(c, job->wait_cmd); + gk20a_free_priv_cmdbuf(c, job->incr_cmd); + + /* another bookkeeping taken in add_job. caller must hold a ref + * so this wouldn't get freed here. */ + gk20a_channel_put(c); + + /* + * ensure all pending writes complete before freeing up the job. + * see corresponding nvgpu_smp_rmb in channel_gk20a_alloc_job(). + */ + nvgpu_smp_wmb(); + + channel_gk20a_free_job(c, job); + job_finished = 1; + + /* + * Deterministic channels have a channel-wide power reference; + * for others, there's one per submit. + */ + if (!c->deterministic) + gk20a_idle(g); + + if (!clean_all) { + /* Timeout isn't supported here so don't touch it. */ + break; + } + } + + nvgpu_mutex_release(&c->joblist.cleanup_lock); + + if (job_finished && g->os_channel.work_completion_signal) + g->os_channel.work_completion_signal(c); + + gk20a_channel_put(c); +} + +/** + * Schedule a job cleanup work on this channel to free resources and to signal + * about completion. + * + * Call this when there has been an interrupt about finished jobs, or when job + * cleanup needs to be performed, e.g., when closing a channel. This is always + * safe to call even if there is nothing to clean up. Any visible actions on + * jobs just before calling this are guaranteed to be processed. + */ +void gk20a_channel_update(struct channel_gk20a *c) +{ + if (!c->g->power_on) { /* shutdown case */ + return; + } + + trace_gk20a_channel_update(c->chid); + /* A queued channel is always checked for job cleanup. */ + gk20a_channel_worker_enqueue(c); +} + +/* + * Stop deterministic channel activity for do_idle() when power needs to go off + * momentarily but deterministic channels keep power refs for potentially a + * long time. + * + * Takes write access on g->deterministic_busy. + * + * Must be paired with gk20a_channel_deterministic_unidle(). + */ +void gk20a_channel_deterministic_idle(struct gk20a *g) +{ + struct fifo_gk20a *f = &g->fifo; + u32 chid; + + /* Grab exclusive access to the hw to block new submits */ + nvgpu_rwsem_down_write(&g->deterministic_busy); + + for (chid = 0; chid < f->num_channels; chid++) { + struct channel_gk20a *ch = &f->channel[chid]; + + if (!gk20a_channel_get(ch)) + continue; + + if (ch->deterministic && !ch->deterministic_railgate_allowed) { + /* + * Drop the power ref taken when setting deterministic + * flag. deterministic_unidle will put this and the + * channel ref back. If railgate is allowed separately + * for this channel, the power ref has already been put + * away. + * + * Hold the channel ref: it must not get freed in + * between. A race could otherwise result in lost + * gk20a_busy() via unidle, and in unbalanced + * gk20a_idle() via closing the channel. + */ + gk20a_idle(g); + } else { + /* Not interesting, carry on. */ + gk20a_channel_put(ch); + } + } +} + +/* + * Allow deterministic channel activity again for do_unidle(). + * + * This releases write access on g->deterministic_busy. + */ +void gk20a_channel_deterministic_unidle(struct gk20a *g) +{ + struct fifo_gk20a *f = &g->fifo; + u32 chid; + + for (chid = 0; chid < f->num_channels; chid++) { + struct channel_gk20a *ch = &f->channel[chid]; + + if (!gk20a_channel_get(ch)) + continue; + + /* + * Deterministic state changes inside deterministic_busy lock, + * which we took in deterministic_idle. + */ + if (ch->deterministic && !ch->deterministic_railgate_allowed) { + if (gk20a_busy(g)) + nvgpu_err(g, "cannot busy() again!"); + /* Took this in idle() */ + gk20a_channel_put(ch); + } + + gk20a_channel_put(ch); + } + + /* Release submits, new deterministic channels and frees */ + nvgpu_rwsem_up_write(&g->deterministic_busy); +} + +int gk20a_init_channel_support(struct gk20a *g, u32 chid) +{ + struct channel_gk20a *c = g->fifo.channel+chid; + int err; + + c->g = NULL; + c->chid = chid; + nvgpu_atomic_set(&c->bound, false); + nvgpu_spinlock_init(&c->ref_obtain_lock); + nvgpu_atomic_set(&c->ref_count, 0); + c->referenceable = false; + nvgpu_cond_init(&c->ref_count_dec_wq); + +#if GK20A_CHANNEL_REFCOUNT_TRACKING + nvgpu_spinlock_init(&c->ref_actions_lock); +#endif + nvgpu_spinlock_init(&c->joblist.dynamic.lock); + nvgpu_raw_spinlock_init(&c->timeout.lock); + + nvgpu_init_list_node(&c->joblist.dynamic.jobs); + nvgpu_init_list_node(&c->dbg_s_list); + nvgpu_init_list_node(&c->worker_item); + + err = nvgpu_mutex_init(&c->ioctl_lock); + if (err) + return err; + err = nvgpu_mutex_init(&c->joblist.cleanup_lock); + if (err) + goto fail_1; + err = nvgpu_mutex_init(&c->joblist.pre_alloc.read_lock); + if (err) + goto fail_2; + err = nvgpu_mutex_init(&c->sync_lock); + if (err) + goto fail_3; +#if defined(CONFIG_GK20A_CYCLE_STATS) + err = nvgpu_mutex_init(&c->cyclestate.cyclestate_buffer_mutex); + if (err) + goto fail_4; + err = nvgpu_mutex_init(&c->cs_client_mutex); + if (err) + goto fail_5; +#endif + err = nvgpu_mutex_init(&c->dbg_s_lock); + if (err) + goto fail_6; + + nvgpu_list_add(&c->free_chs, &g->fifo.free_chs); + + return 0; + +fail_6: +#if defined(CONFIG_GK20A_CYCLE_STATS) + nvgpu_mutex_destroy(&c->cs_client_mutex); +fail_5: + nvgpu_mutex_destroy(&c->cyclestate.cyclestate_buffer_mutex); +fail_4: +#endif + nvgpu_mutex_destroy(&c->sync_lock); +fail_3: + nvgpu_mutex_destroy(&c->joblist.pre_alloc.read_lock); +fail_2: + nvgpu_mutex_destroy(&c->joblist.cleanup_lock); +fail_1: + nvgpu_mutex_destroy(&c->ioctl_lock); + + return err; +} + +/* in this context the "channel" is the host1x channel which + * maps to *all* gk20a channels */ +int gk20a_channel_suspend(struct gk20a *g) +{ + struct fifo_gk20a *f = &g->fifo; + u32 chid; + bool channels_in_use = false; + u32 active_runlist_ids = 0; + + nvgpu_log_fn(g, " "); + + for (chid = 0; chid < f->num_channels; chid++) { + struct channel_gk20a *ch = &f->channel[chid]; + if (gk20a_channel_get(ch)) { + nvgpu_log_info(g, "suspend channel %d", chid); + /* disable channel */ + gk20a_disable_channel_tsg(g, ch); + /* preempt the channel */ + gk20a_fifo_preempt(g, ch); + /* wait for channel update notifiers */ + if (g->os_channel.work_completion_cancel_sync) + g->os_channel.work_completion_cancel_sync(ch); + + channels_in_use = true; + + active_runlist_ids |= BIT(ch->runlist_id); + + gk20a_channel_put(ch); + } + } + + if (channels_in_use) { + gk20a_fifo_update_runlist_ids(g, active_runlist_ids, ~0, false, true); + + for (chid = 0; chid < f->num_channels; chid++) { + if (gk20a_channel_get(&f->channel[chid])) { + g->ops.fifo.unbind_channel(&f->channel[chid]); + gk20a_channel_put(&f->channel[chid]); + } + } + } + + nvgpu_log_fn(g, "done"); + return 0; +} + +int gk20a_channel_resume(struct gk20a *g) +{ + struct fifo_gk20a *f = &g->fifo; + u32 chid; + bool channels_in_use = false; + u32 active_runlist_ids = 0; + + nvgpu_log_fn(g, " "); + + for (chid = 0; chid < f->num_channels; chid++) { + if (gk20a_channel_get(&f->channel[chid])) { + nvgpu_log_info(g, "resume channel %d", chid); + g->ops.fifo.bind_channel(&f->channel[chid]); + channels_in_use = true; + active_runlist_ids |= BIT(f->channel[chid].runlist_id); + gk20a_channel_put(&f->channel[chid]); + } + } + + if (channels_in_use) + gk20a_fifo_update_runlist_ids(g, active_runlist_ids, ~0, true, true); + + nvgpu_log_fn(g, "done"); + return 0; +} + +void gk20a_channel_semaphore_wakeup(struct gk20a *g, bool post_events) +{ + struct fifo_gk20a *f = &g->fifo; + u32 chid; + + nvgpu_log_fn(g, " "); + + /* + * Ensure that all pending writes are actually done before trying to + * read semaphore values from DRAM. + */ + g->ops.mm.fb_flush(g); + + for (chid = 0; chid < f->num_channels; chid++) { + struct channel_gk20a *c = g->fifo.channel+chid; + if (gk20a_channel_get(c)) { + if (nvgpu_atomic_read(&c->bound)) { + nvgpu_cond_broadcast_interruptible( + &c->semaphore_wq); + if (post_events) { + if (gk20a_is_channel_marked_as_tsg(c)) { + struct tsg_gk20a *tsg = + &g->fifo.tsg[c->tsgid]; + + g->ops.fifo.post_event_id(tsg, + NVGPU_EVENT_ID_BLOCKING_SYNC); + } + } + /* + * Only non-deterministic channels get the + * channel_update callback. We don't allow + * semaphore-backed syncs for these channels + * anyways, since they have a dependency on + * the sync framework. + * If deterministic channels are receiving a + * semaphore wakeup, it must be for a + * user-space managed + * semaphore. + */ + if (!c->deterministic) + gk20a_channel_update(c); + } + gk20a_channel_put(c); + } + } +} diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c deleted file mode 100644 index 77458917..00000000 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ /dev/null @@ -1,2262 +0,0 @@ -/* - * GK20A Graphics channel - * - * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "gk20a.h" -#include "dbg_gpu_gk20a.h" -#include "fence_gk20a.h" - -static void free_channel(struct fifo_gk20a *f, struct channel_gk20a *c); -static void gk20a_channel_dump_ref_actions(struct channel_gk20a *c); - -static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c); -static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c); - -static void channel_gk20a_free_prealloc_resources(struct channel_gk20a *c); - -static void channel_gk20a_joblist_add(struct channel_gk20a *c, - struct channel_gk20a_job *job); -static void channel_gk20a_joblist_delete(struct channel_gk20a *c, - struct channel_gk20a_job *job); -static struct channel_gk20a_job *channel_gk20a_joblist_peek( - struct channel_gk20a *c); - -/* allocate GPU channel */ -static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f) -{ - struct channel_gk20a *ch = NULL; - struct gk20a *g = f->g; - - nvgpu_mutex_acquire(&f->free_chs_mutex); - if (!nvgpu_list_empty(&f->free_chs)) { - ch = nvgpu_list_first_entry(&f->free_chs, channel_gk20a, - free_chs); - nvgpu_list_del(&ch->free_chs); - WARN_ON(nvgpu_atomic_read(&ch->ref_count)); - WARN_ON(ch->referenceable); - f->used_channels++; - } - nvgpu_mutex_release(&f->free_chs_mutex); - - if (g->aggressive_sync_destroy_thresh && - (f->used_channels > - g->aggressive_sync_destroy_thresh)) - g->aggressive_sync_destroy = true; - - return ch; -} - -static void free_channel(struct fifo_gk20a *f, - struct channel_gk20a *ch) -{ - struct gk20a *g = f->g; - - trace_gk20a_release_used_channel(ch->chid); - /* refcount is zero here and channel is in a freed/dead state */ - nvgpu_mutex_acquire(&f->free_chs_mutex); - /* add to head to increase visibility of timing-related bugs */ - nvgpu_list_add(&ch->free_chs, &f->free_chs); - f->used_channels--; - nvgpu_mutex_release(&f->free_chs_mutex); - - /* - * On teardown it is not possible to dereference platform, but ignoring - * this is fine then because no new channels would be created. - */ - if (!nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING)) { - if (g->aggressive_sync_destroy_thresh && - (f->used_channels < - g->aggressive_sync_destroy_thresh)) - g->aggressive_sync_destroy = false; - } -} - -int channel_gk20a_commit_va(struct channel_gk20a *c) -{ - struct gk20a *g = c->g; - - nvgpu_log_fn(g, " "); - - g->ops.mm.init_inst_block(&c->inst_block, c->vm, - c->vm->gmmu_page_sizes[GMMU_PAGE_SIZE_BIG]); - - return 0; -} - -int gk20a_channel_get_timescale_from_timeslice(struct gk20a *g, - unsigned int timeslice_period, - unsigned int *__timeslice_timeout, unsigned int *__timeslice_scale) -{ - unsigned int value = scale_ptimer(timeslice_period, - ptimer_scalingfactor10x(g->ptimer_src_freq)); - unsigned int shift = 0; - - /* value field is 8 bits long */ - while (value >= 1 << 8) { - value >>= 1; - shift++; - } - - /* time slice register is only 18bits long */ - if ((value << shift) >= 1<<19) { - nvgpu_err(g, "Requested timeslice value is clamped to 18 bits\n"); - value = 255; - shift = 10; - } - - *__timeslice_timeout = value; - *__timeslice_scale = shift; - - return 0; -} - -int channel_gk20a_update_runlist(struct channel_gk20a *c, bool add) -{ - return c->g->ops.fifo.update_runlist(c->g, c->runlist_id, c->chid, add, true); -} - -int gk20a_enable_channel_tsg(struct gk20a *g, struct channel_gk20a *ch) -{ - struct tsg_gk20a *tsg; - - if (gk20a_is_channel_marked_as_tsg(ch)) { - tsg = &g->fifo.tsg[ch->tsgid]; - g->ops.fifo.enable_tsg(tsg); - } else { - g->ops.fifo.enable_channel(ch); - } - - return 0; -} - -int gk20a_disable_channel_tsg(struct gk20a *g, struct channel_gk20a *ch) -{ - struct tsg_gk20a *tsg; - - if (gk20a_is_channel_marked_as_tsg(ch)) { - tsg = &g->fifo.tsg[ch->tsgid]; - g->ops.fifo.disable_tsg(tsg); - } else { - g->ops.fifo.disable_channel(ch); - } - - return 0; -} - -void gk20a_channel_abort_clean_up(struct channel_gk20a *ch) -{ - /* synchronize with actual job cleanup */ - nvgpu_mutex_acquire(&ch->joblist.cleanup_lock); - - /* ensure no fences are pending */ - nvgpu_mutex_acquire(&ch->sync_lock); - if (ch->sync) - ch->sync->set_min_eq_max(ch->sync); - if (ch->user_sync) - ch->user_sync->set_safe_state(ch->user_sync); - nvgpu_mutex_release(&ch->sync_lock); - - nvgpu_mutex_release(&ch->joblist.cleanup_lock); - - /* - * When closing the channel, this scheduled update holds one ref which - * is waited for before advancing with freeing. - */ - gk20a_channel_update(ch); -} - -void gk20a_channel_abort(struct channel_gk20a *ch, bool channel_preempt) -{ - nvgpu_log_fn(ch->g, " "); - - if (gk20a_is_channel_marked_as_tsg(ch)) - return gk20a_fifo_abort_tsg(ch->g, ch->tsgid, channel_preempt); - - /* make sure new kickoffs are prevented */ - ch->has_timedout = true; - - ch->g->ops.fifo.disable_channel(ch); - - if (channel_preempt && gk20a_is_channel_marked_as_tsg(ch)) - ch->g->ops.fifo.preempt_channel(ch->g, ch->chid); - - if (ch->g->ops.fifo.ch_abort_clean_up) - ch->g->ops.fifo.ch_abort_clean_up(ch); -} - -int gk20a_wait_channel_idle(struct channel_gk20a *ch) -{ - bool channel_idle = false; - struct nvgpu_timeout timeout; - - nvgpu_timeout_init(ch->g, &timeout, gk20a_get_gr_idle_timeout(ch->g), - NVGPU_TIMER_CPU_TIMER); - - do { - channel_gk20a_joblist_lock(ch); - channel_idle = channel_gk20a_joblist_is_empty(ch); - channel_gk20a_joblist_unlock(ch); - if (channel_idle) - break; - - nvgpu_usleep_range(1000, 3000); - } while (!nvgpu_timeout_expired(&timeout)); - - if (!channel_idle) { - nvgpu_err(ch->g, "jobs not freed for channel %d", - ch->chid); - return -EBUSY; - } - - return 0; -} - -void gk20a_disable_channel(struct channel_gk20a *ch) -{ - gk20a_channel_abort(ch, true); - channel_gk20a_update_runlist(ch, false); -} - -void gk20a_wait_until_counter_is_N( - struct channel_gk20a *ch, nvgpu_atomic_t *counter, int wait_value, - struct nvgpu_cond *c, const char *caller, const char *counter_name) -{ - while (true) { - if (NVGPU_COND_WAIT( - c, - nvgpu_atomic_read(counter) == wait_value, - 5000) == 0) - break; - - nvgpu_warn(ch->g, - "%s: channel %d, still waiting, %s left: %d, waiting for: %d", - caller, ch->chid, counter_name, - nvgpu_atomic_read(counter), wait_value); - - gk20a_channel_dump_ref_actions(ch); - } -} - -/* call ONLY when no references to the channel exist: after the last put */ -static void gk20a_free_channel(struct channel_gk20a *ch, bool force) -{ - struct gk20a *g = ch->g; - struct fifo_gk20a *f = &g->fifo; - struct gr_gk20a *gr = &g->gr; - struct vm_gk20a *ch_vm = ch->vm; - unsigned long timeout = gk20a_get_gr_idle_timeout(g); - struct dbg_session_gk20a *dbg_s; - struct dbg_session_data *session_data, *tmp_s; - struct dbg_session_channel_data *ch_data, *tmp; - int err; - - nvgpu_log_fn(g, " "); - - WARN_ON(ch->g == NULL); - - trace_gk20a_free_channel(ch->chid); - - if (g->os_channel.close) - g->os_channel.close(ch); - - /* - * Disable channel/TSG and unbind here. This should not be executed if - * HW access is not available during shutdown/removal path as it will - * trigger a timeout - */ - if (!nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING)) { - /* abort channel and remove from runlist */ - if (gk20a_is_channel_marked_as_tsg(ch)) { - err = gk20a_tsg_unbind_channel(ch); - if (err) - nvgpu_err(g, - "failed to unbind channel %d from TSG", - ch->chid); - } else { - /* - * Channel is already unbound from TSG by User with - * explicit call - * Nothing to do here in that case - */ - } - } - /* wait until there's only our ref to the channel */ - if (!force) - gk20a_wait_until_counter_is_N( - ch, &ch->ref_count, 1, &ch->ref_count_dec_wq, - __func__, "references"); - - /* wait until all pending interrupts for recently completed - * jobs are handled */ - nvgpu_wait_for_deferred_interrupts(g); - - /* prevent new refs */ - nvgpu_spinlock_acquire(&ch->ref_obtain_lock); - if (!ch->referenceable) { - nvgpu_spinlock_release(&ch->ref_obtain_lock); - nvgpu_err(ch->g, - "Extra %s() called to channel %u", - __func__, ch->chid); - return; - } - ch->referenceable = false; - nvgpu_spinlock_release(&ch->ref_obtain_lock); - - /* matches with the initial reference in gk20a_open_new_channel() */ - nvgpu_atomic_dec(&ch->ref_count); - - /* wait until no more refs to the channel */ - if (!force) - gk20a_wait_until_counter_is_N( - ch, &ch->ref_count, 0, &ch->ref_count_dec_wq, - __func__, "references"); - - /* if engine reset was deferred, perform it now */ - nvgpu_mutex_acquire(&f->deferred_reset_mutex); - if (g->fifo.deferred_reset_pending) { - nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, "engine reset was" - " deferred, running now"); - /* if lock is already taken, a reset is taking place - so no need to repeat */ - if (nvgpu_mutex_tryacquire(&g->fifo.gr_reset_mutex)) { - gk20a_fifo_deferred_reset(g, ch); - nvgpu_mutex_release(&g->fifo.gr_reset_mutex); - } - } - nvgpu_mutex_release(&f->deferred_reset_mutex); - - if (!gk20a_channel_as_bound(ch)) - goto unbind; - - nvgpu_log_info(g, "freeing bound channel context, timeout=%ld", - timeout); - -#ifdef CONFIG_GK20A_CTXSW_TRACE - if (g->ops.fecs_trace.unbind_channel && !ch->vpr) - g->ops.fecs_trace.unbind_channel(g, ch); -#endif - - if(g->ops.fifo.free_channel_ctx_header) - g->ops.fifo.free_channel_ctx_header(ch); - - if (ch->usermode_submit_enabled) { - gk20a_channel_free_usermode_buffers(ch); - ch->userd_iova = nvgpu_mem_get_addr(g, &f->userd) + - ch->chid * f->userd_entry_size; - ch->usermode_submit_enabled = false; - } - - gk20a_gr_flush_channel_tlb(gr); - - nvgpu_dma_unmap_free(ch_vm, &ch->gpfifo.mem); - nvgpu_big_free(g, ch->gpfifo.pipe); - memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc)); - - channel_gk20a_free_priv_cmdbuf(ch); - - /* sync must be destroyed before releasing channel vm */ - nvgpu_mutex_acquire(&ch->sync_lock); - if (ch->sync) { - gk20a_channel_sync_destroy(ch->sync, false); - ch->sync = NULL; - } - if (ch->user_sync) { - /* - * Set user managed syncpoint to safe state - * But it's already done if channel has timedout - */ - if (ch->has_timedout) - gk20a_channel_sync_destroy(ch->user_sync, false); - else - gk20a_channel_sync_destroy(ch->user_sync, true); - ch->user_sync = NULL; - } - nvgpu_mutex_release(&ch->sync_lock); - - /* - * free the channel used semaphore index. - * we need to do this before releasing the address space, - * as the semaphore pool might get freed after that point. - */ - if (ch->hw_sema) - nvgpu_semaphore_free_hw_sema(ch); - - /* - * When releasing the channel we unbind the VM - so release the ref. - */ - nvgpu_vm_put(ch_vm); - - /* make sure we don't have deferred interrupts pending that - * could still touch the channel */ - nvgpu_wait_for_deferred_interrupts(g); - -unbind: - g->ops.fifo.unbind_channel(ch); - g->ops.fifo.free_inst(g, ch); - - /* put back the channel-wide submit ref from init */ - if (ch->deterministic) { - nvgpu_rwsem_down_read(&g->deterministic_busy); - ch->deterministic = false; - if (!ch->deterministic_railgate_allowed) - gk20a_idle(g); - ch->deterministic_railgate_allowed = false; - - nvgpu_rwsem_up_read(&g->deterministic_busy); - } - - ch->vpr = false; - ch->vm = NULL; - - WARN_ON(ch->sync); - - /* unlink all debug sessions */ - nvgpu_mutex_acquire(&g->dbg_sessions_lock); - - nvgpu_list_for_each_entry_safe(session_data, tmp_s, - &ch->dbg_s_list, dbg_session_data, dbg_s_entry) { - dbg_s = session_data->dbg_s; - nvgpu_mutex_acquire(&dbg_s->ch_list_lock); - nvgpu_list_for_each_entry_safe(ch_data, tmp, &dbg_s->ch_list, - dbg_session_channel_data, ch_entry) { - if (ch_data->chid == ch->chid) - ch_data->unbind_single_channel(dbg_s, ch_data); - } - nvgpu_mutex_release(&dbg_s->ch_list_lock); - } - - nvgpu_mutex_release(&g->dbg_sessions_lock); - - /* free pre-allocated resources, if applicable */ - if (channel_gk20a_is_prealloc_enabled(ch)) - channel_gk20a_free_prealloc_resources(ch); - -#if GK20A_CHANNEL_REFCOUNT_TRACKING - memset(ch->ref_actions, 0, sizeof(ch->ref_actions)); - ch->ref_actions_put = 0; -#endif - - /* make sure we catch accesses of unopened channels in case - * there's non-refcounted channel pointers hanging around */ - ch->g = NULL; - nvgpu_smp_wmb(); - - /* ALWAYS last */ - free_channel(f, ch); -} - -static void gk20a_channel_dump_ref_actions(struct channel_gk20a *ch) -{ -#if GK20A_CHANNEL_REFCOUNT_TRACKING - size_t i, get; - s64 now = nvgpu_current_time_ms(); - s64 prev = 0; - struct gk20a *g = ch->g; - - nvgpu_spinlock_acquire(&ch->ref_actions_lock); - - nvgpu_info(g, "ch %d: refs %d. Actions, most recent last:", - ch->chid, nvgpu_atomic_read(&ch->ref_count)); - - /* start at the oldest possible entry. put is next insertion point */ - get = ch->ref_actions_put; - - /* - * If the buffer is not full, this will first loop to the oldest entry, - * skipping not-yet-initialized entries. There is no ref_actions_get. - */ - for (i = 0; i < GK20A_CHANNEL_REFCOUNT_TRACKING; i++) { - struct channel_gk20a_ref_action *act = &ch->ref_actions[get]; - - if (act->trace.nr_entries) { - nvgpu_info(g, - "%s ref %zu steps ago (age %lld ms, diff %lld ms)", - act->type == channel_gk20a_ref_action_get - ? "GET" : "PUT", - GK20A_CHANNEL_REFCOUNT_TRACKING - 1 - i, - now - act->timestamp_ms, - act->timestamp_ms - prev); - - print_stack_trace(&act->trace, 0); - prev = act->timestamp_ms; - } - - get = (get + 1) % GK20A_CHANNEL_REFCOUNT_TRACKING; - } - - nvgpu_spinlock_release(&ch->ref_actions_lock); -#endif -} - -static void gk20a_channel_save_ref_source(struct channel_gk20a *ch, - enum channel_gk20a_ref_action_type type) -{ -#if GK20A_CHANNEL_REFCOUNT_TRACKING - struct channel_gk20a_ref_action *act; - - nvgpu_spinlock_acquire(&ch->ref_actions_lock); - - act = &ch->ref_actions[ch->ref_actions_put]; - act->type = type; - act->trace.max_entries = GK20A_CHANNEL_REFCOUNT_TRACKING_STACKLEN; - act->trace.nr_entries = 0; - act->trace.skip = 3; /* onwards from the caller of this */ - act->trace.entries = act->trace_entries; - save_stack_trace(&act->trace); - act->timestamp_ms = nvgpu_current_time_ms(); - ch->ref_actions_put = (ch->ref_actions_put + 1) % - GK20A_CHANNEL_REFCOUNT_TRACKING; - - nvgpu_spinlock_release(&ch->ref_actions_lock); -#endif -} - -/* Try to get a reference to the channel. Return nonzero on success. If fails, - * the channel is dead or being freed elsewhere and you must not touch it. - * - * Always when a channel_gk20a pointer is seen and about to be used, a - * reference must be held to it - either by you or the caller, which should be - * documented well or otherwise clearly seen. This usually boils down to the - * file from ioctls directly, or an explicit get in exception handlers when the - * channel is found by a chid. - * - * Most global functions in this file require a reference to be held by the - * caller. - */ -struct channel_gk20a *_gk20a_channel_get(struct channel_gk20a *ch, - const char *caller) { - struct channel_gk20a *ret; - - nvgpu_spinlock_acquire(&ch->ref_obtain_lock); - - if (likely(ch->referenceable)) { - gk20a_channel_save_ref_source(ch, channel_gk20a_ref_action_get); - nvgpu_atomic_inc(&ch->ref_count); - ret = ch; - } else - ret = NULL; - - nvgpu_spinlock_release(&ch->ref_obtain_lock); - - if (ret) - trace_gk20a_channel_get(ch->chid, caller); - - return ret; -} - -void _gk20a_channel_put(struct channel_gk20a *ch, const char *caller) -{ - gk20a_channel_save_ref_source(ch, channel_gk20a_ref_action_put); - trace_gk20a_channel_put(ch->chid, caller); - nvgpu_atomic_dec(&ch->ref_count); - nvgpu_cond_broadcast(&ch->ref_count_dec_wq); - - /* More puts than gets. Channel is probably going to get - * stuck. */ - WARN_ON(nvgpu_atomic_read(&ch->ref_count) < 0); - - /* Also, more puts than gets. ref_count can go to 0 only if - * the channel is closing. Channel is probably going to get - * stuck. */ - WARN_ON(nvgpu_atomic_read(&ch->ref_count) == 0 && ch->referenceable); -} - -void gk20a_channel_close(struct channel_gk20a *ch) -{ - gk20a_free_channel(ch, false); -} - -/* - * Be careful with this - it is meant for terminating channels when we know the - * driver is otherwise dying. Ref counts and the like are ignored by this - * version of the cleanup. - */ -void __gk20a_channel_kill(struct channel_gk20a *ch) -{ - gk20a_free_channel(ch, true); -} - -struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g, - s32 runlist_id, - bool is_privileged_channel, - pid_t pid, pid_t tid) -{ - struct fifo_gk20a *f = &g->fifo; - struct channel_gk20a *ch; - - /* compatibility with existing code */ - if (!gk20a_fifo_is_valid_runlist_id(g, runlist_id)) { - runlist_id = gk20a_fifo_get_gr_runlist_id(g); - } - - nvgpu_log_fn(g, " "); - - ch = allocate_channel(f); - if (ch == NULL) { - /* TBD: we want to make this virtualizable */ - nvgpu_err(g, "out of hw chids"); - return NULL; - } - - trace_gk20a_open_new_channel(ch->chid); - - BUG_ON(ch->g); - ch->g = g; - - /* Runlist for the channel */ - ch->runlist_id = runlist_id; - - /* Channel privilege level */ - ch->is_privileged_channel = is_privileged_channel; - - ch->pid = tid; - ch->tgid = pid; /* process granularity for FECS traces */ - - if (g->ops.fifo.alloc_inst(g, ch)) { - ch->g = NULL; - free_channel(f, ch); - nvgpu_err(g, - "failed to open gk20a channel, out of inst mem"); - return NULL; - } - - /* now the channel is in a limbo out of the free list but not marked as - * alive and used (i.e. get-able) yet */ - - /* By default, channel is regular (non-TSG) channel */ - ch->tsgid = NVGPU_INVALID_TSG_ID; - - /* clear ctxsw timeout counter and update timestamp */ - ch->timeout_accumulated_ms = 0; - ch->timeout_gpfifo_get = 0; - /* set gr host default timeout */ - ch->timeout_ms_max = gk20a_get_gr_idle_timeout(g); - ch->timeout_debug_dump = true; - ch->has_timedout = false; - - /* init kernel watchdog timeout */ - ch->timeout.enabled = true; - ch->timeout.limit_ms = g->ch_wdt_timeout_ms; - ch->timeout.debug_dump = true; - - ch->obj_class = 0; - ch->subctx_id = 0; - ch->runqueue_sel = 0; - - ch->mmu_nack_handled = false; - - /* The channel is *not* runnable at this point. It still needs to have - * an address space bound and allocate a gpfifo and grctx. */ - - nvgpu_cond_init(&ch->notifier_wq); - nvgpu_cond_init(&ch->semaphore_wq); - - if (g->os_channel.open) - g->os_channel.open(ch); - - /* Mark the channel alive, get-able, with 1 initial use - * references. The initial reference will be decreased in - * gk20a_free_channel() */ - ch->referenceable = true; - nvgpu_atomic_set(&ch->ref_count, 1); - nvgpu_smp_wmb(); - - return ch; -} - -/* allocate private cmd buffer. - used for inserting commands before/after user submitted buffers. */ -static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c) -{ - struct gk20a *g = c->g; - struct vm_gk20a *ch_vm = c->vm; - struct priv_cmd_queue *q = &c->priv_cmd_q; - u32 size; - int err = 0; - - /* - * Compute the amount of priv_cmdbuf space we need. In general the worst - * case is the kernel inserts both a semaphore pre-fence and post-fence. - * Any sync-pt fences will take less memory so we can ignore them for - * now. - * - * A semaphore ACQ (fence-wait) is 8 dwords: semaphore_a, semaphore_b, - * semaphore_c, and semaphore_d. A semaphore INCR (fence-get) will be 10 - * dwords: all the same as an ACQ plus a non-stalling intr which is - * another 2 dwords. - * - * Lastly the number of gpfifo entries per channel is fixed so at most - * we can use 2/3rds of the gpfifo entries (1 pre-fence entry, one - * userspace entry, and one post-fence entry). Thus the computation is: - * - * (gpfifo entry number * (2 / 3) * (8 + 10) * 4 bytes. - */ - size = roundup_pow_of_two(c->gpfifo.entry_num * - 2 * 18 * sizeof(u32) / 3); - - err = nvgpu_dma_alloc_map_sys(ch_vm, size, &q->mem); - if (err) { - nvgpu_err(g, "%s: memory allocation failed", __func__); - goto clean_up; - } - - q->size = q->mem.size / sizeof (u32); - - return 0; - -clean_up: - channel_gk20a_free_priv_cmdbuf(c); - return err; -} - -static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c) -{ - struct vm_gk20a *ch_vm = c->vm; - struct priv_cmd_queue *q = &c->priv_cmd_q; - - if (q->size == 0) - return; - - nvgpu_dma_unmap_free(ch_vm, &q->mem); - - memset(q, 0, sizeof(struct priv_cmd_queue)); -} - -/* allocate a cmd buffer with given size. size is number of u32 entries */ -int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size, - struct priv_cmd_entry *e) -{ - struct priv_cmd_queue *q = &c->priv_cmd_q; - u32 free_count; - u32 size = orig_size; - - nvgpu_log_fn(c->g, "size %d", orig_size); - - if (!e) { - nvgpu_err(c->g, - "ch %d: priv cmd entry is null", - c->chid); - return -EINVAL; - } - - /* if free space in the end is less than requested, increase the size - * to make the real allocated space start from beginning. */ - if (q->put + size > q->size) - size = orig_size + (q->size - q->put); - - nvgpu_log_info(c->g, "ch %d: priv cmd queue get:put %d:%d", - c->chid, q->get, q->put); - - free_count = (q->size - (q->put - q->get) - 1) % q->size; - - if (size > free_count) - return -EAGAIN; - - e->size = orig_size; - e->mem = &q->mem; - - /* if we have increased size to skip free space in the end, set put - to beginning of cmd buffer (0) + size */ - if (size != orig_size) { - e->off = 0; - e->gva = q->mem.gpu_va; - q->put = orig_size; - } else { - e->off = q->put; - e->gva = q->mem.gpu_va + q->put * sizeof(u32); - q->put = (q->put + orig_size) & (q->size - 1); - } - - /* we already handled q->put + size > q->size so BUG_ON this */ - BUG_ON(q->put > q->size); - - /* - * commit the previous writes before making the entry valid. - * see the corresponding nvgpu_smp_rmb() in gk20a_free_priv_cmdbuf(). - */ - nvgpu_smp_wmb(); - - e->valid = true; - nvgpu_log_fn(c->g, "done"); - - return 0; -} - -/* Don't call this to free an explict cmd entry. - * It doesn't update priv_cmd_queue get/put */ -void free_priv_cmdbuf(struct channel_gk20a *c, - struct priv_cmd_entry *e) -{ - if (channel_gk20a_is_prealloc_enabled(c)) - memset(e, 0, sizeof(struct priv_cmd_entry)); - else - nvgpu_kfree(c->g, e); -} - -int channel_gk20a_alloc_job(struct channel_gk20a *c, - struct channel_gk20a_job **job_out) -{ - int err = 0; - - if (channel_gk20a_is_prealloc_enabled(c)) { - int put = c->joblist.pre_alloc.put; - int get = c->joblist.pre_alloc.get; - - /* - * ensure all subsequent reads happen after reading get. - * see corresponding nvgpu_smp_wmb in - * gk20a_channel_clean_up_jobs() - */ - nvgpu_smp_rmb(); - - if (CIRC_SPACE(put, get, c->joblist.pre_alloc.length)) - *job_out = &c->joblist.pre_alloc.jobs[put]; - else { - nvgpu_warn(c->g, - "out of job ringbuffer space"); - err = -EAGAIN; - } - } else { - *job_out = nvgpu_kzalloc(c->g, - sizeof(struct channel_gk20a_job)); - if (!*job_out) - err = -ENOMEM; - } - - return err; -} - -void channel_gk20a_free_job(struct channel_gk20a *c, - struct channel_gk20a_job *job) -{ - /* - * In case of pre_allocated jobs, we need to clean out - * the job but maintain the pointers to the priv_cmd_entry, - * since they're inherently tied to the job node. - */ - if (channel_gk20a_is_prealloc_enabled(c)) { - struct priv_cmd_entry *wait_cmd = job->wait_cmd; - struct priv_cmd_entry *incr_cmd = job->incr_cmd; - memset(job, 0, sizeof(*job)); - job->wait_cmd = wait_cmd; - job->incr_cmd = incr_cmd; - } else - nvgpu_kfree(c->g, job); -} - -void channel_gk20a_joblist_lock(struct channel_gk20a *c) -{ - if (channel_gk20a_is_prealloc_enabled(c)) - nvgpu_mutex_acquire(&c->joblist.pre_alloc.read_lock); - else - nvgpu_spinlock_acquire(&c->joblist.dynamic.lock); -} - -void channel_gk20a_joblist_unlock(struct channel_gk20a *c) -{ - if (channel_gk20a_is_prealloc_enabled(c)) - nvgpu_mutex_release(&c->joblist.pre_alloc.read_lock); - else - nvgpu_spinlock_release(&c->joblist.dynamic.lock); -} - -static struct channel_gk20a_job *channel_gk20a_joblist_peek( - struct channel_gk20a *c) -{ - int get; - struct channel_gk20a_job *job = NULL; - - if (channel_gk20a_is_prealloc_enabled(c)) { - if (!channel_gk20a_joblist_is_empty(c)) { - get = c->joblist.pre_alloc.get; - job = &c->joblist.pre_alloc.jobs[get]; - } - } else { - if (!nvgpu_list_empty(&c->joblist.dynamic.jobs)) - job = nvgpu_list_first_entry(&c->joblist.dynamic.jobs, - channel_gk20a_job, list); - } - - return job; -} - -static void channel_gk20a_joblist_add(struct channel_gk20a *c, - struct channel_gk20a_job *job) -{ - if (channel_gk20a_is_prealloc_enabled(c)) { - c->joblist.pre_alloc.put = (c->joblist.pre_alloc.put + 1) % - (c->joblist.pre_alloc.length); - } else { - nvgpu_list_add_tail(&job->list, &c->joblist.dynamic.jobs); - } -} - -static void channel_gk20a_joblist_delete(struct channel_gk20a *c, - struct channel_gk20a_job *job) -{ - if (channel_gk20a_is_prealloc_enabled(c)) { - c->joblist.pre_alloc.get = (c->joblist.pre_alloc.get + 1) % - (c->joblist.pre_alloc.length); - } else { - nvgpu_list_del(&job->list); - } -} - -bool channel_gk20a_joblist_is_empty(struct channel_gk20a *c) -{ - if (channel_gk20a_is_prealloc_enabled(c)) { - int get = c->joblist.pre_alloc.get; - int put = c->joblist.pre_alloc.put; - return !(CIRC_CNT(put, get, c->joblist.pre_alloc.length)); - } - - return nvgpu_list_empty(&c->joblist.dynamic.jobs); -} - -bool channel_gk20a_is_prealloc_enabled(struct channel_gk20a *c) -{ - bool pre_alloc_enabled = c->joblist.pre_alloc.enabled; - - nvgpu_smp_rmb(); - return pre_alloc_enabled; -} - -static int channel_gk20a_prealloc_resources(struct channel_gk20a *c, - unsigned int num_jobs) -{ - unsigned int i; - int err; - size_t size; - struct priv_cmd_entry *entries = NULL; - - if (channel_gk20a_is_prealloc_enabled(c) || !num_jobs) - return -EINVAL; - - /* - * pre-allocate the job list. - * since vmalloc take in an unsigned long, we need - * to make sure we don't hit an overflow condition - */ - size = sizeof(struct channel_gk20a_job); - if (num_jobs <= ULONG_MAX / size) - c->joblist.pre_alloc.jobs = nvgpu_vzalloc(c->g, - num_jobs * size); - if (!c->joblist.pre_alloc.jobs) { - err = -ENOMEM; - goto clean_up; - } - - /* - * pre-allocate 2x priv_cmd_entry for each job up front. - * since vmalloc take in an unsigned long, we need - * to make sure we don't hit an overflow condition - */ - size = sizeof(struct priv_cmd_entry); - if (num_jobs <= ULONG_MAX / (size << 1)) - entries = nvgpu_vzalloc(c->g, (num_jobs << 1) * size); - if (!entries) { - err = -ENOMEM; - goto clean_up_joblist; - } - - for (i = 0; i < num_jobs; i++) { - c->joblist.pre_alloc.jobs[i].wait_cmd = &entries[i]; - c->joblist.pre_alloc.jobs[i].incr_cmd = - &entries[i + num_jobs]; - } - - /* pre-allocate a fence pool */ - err = gk20a_alloc_fence_pool(c, num_jobs); - if (err) - goto clean_up_priv_cmd; - - c->joblist.pre_alloc.length = num_jobs; - c->joblist.pre_alloc.put = 0; - c->joblist.pre_alloc.get = 0; - - /* - * commit the previous writes before setting the flag. - * see corresponding nvgpu_smp_rmb in - * channel_gk20a_is_prealloc_enabled() - */ - nvgpu_smp_wmb(); - c->joblist.pre_alloc.enabled = true; - - return 0; - -clean_up_priv_cmd: - nvgpu_vfree(c->g, entries); -clean_up_joblist: - nvgpu_vfree(c->g, c->joblist.pre_alloc.jobs); -clean_up: - memset(&c->joblist.pre_alloc, 0, sizeof(c->joblist.pre_alloc)); - return err; -} - -static void channel_gk20a_free_prealloc_resources(struct channel_gk20a *c) -{ - nvgpu_vfree(c->g, c->joblist.pre_alloc.jobs[0].wait_cmd); - nvgpu_vfree(c->g, c->joblist.pre_alloc.jobs); - gk20a_free_fence_pool(c); - - /* - * commit the previous writes before disabling the flag. - * see corresponding nvgpu_smp_rmb in - * channel_gk20a_is_prealloc_enabled() - */ - nvgpu_smp_wmb(); - c->joblist.pre_alloc.enabled = false; -} - -int gk20a_channel_alloc_gpfifo(struct channel_gk20a *c, - struct nvgpu_gpfifo_args *gpfifo_args) -{ - struct gk20a *g = c->g; - struct vm_gk20a *ch_vm; - u32 gpfifo_size, gpfifo_entry_size; - int err = 0; - unsigned long acquire_timeout; - - gpfifo_size = gpfifo_args->num_entries; - gpfifo_entry_size = nvgpu_get_gpfifo_entry_size(); - - if (gpfifo_args->flags & NVGPU_GPFIFO_FLAGS_SUPPORT_VPR) - c->vpr = true; - - if (gpfifo_args->flags & NVGPU_GPFIFO_FLAGS_SUPPORT_DETERMINISTIC) { - nvgpu_rwsem_down_read(&g->deterministic_busy); - /* - * Railgating isn't deterministic; instead of disallowing - * railgating globally, take a power refcount for this - * channel's lifetime. The gk20a_idle() pair for this happens - * when the channel gets freed. - * - * Deterministic flag and this busy must be atomic within the - * busy lock. - */ - err = gk20a_busy(g); - if (err) { - nvgpu_rwsem_up_read(&g->deterministic_busy); - return err; - } - - c->deterministic = true; - nvgpu_rwsem_up_read(&g->deterministic_busy); - } - - /* an address space needs to have been bound at this point. */ - if (!gk20a_channel_as_bound(c)) { - nvgpu_err(g, - "not bound to an address space at time of gpfifo" - " allocation."); - err = -EINVAL; - goto clean_up_idle; - } - ch_vm = c->vm; - - if (c->gpfifo.mem.size) { - nvgpu_err(g, "channel %d :" - "gpfifo already allocated", c->chid); - err = -EEXIST; - goto clean_up_idle; - } - - if (gpfifo_args->flags & NVGPU_GPFIFO_FLAGS_USERMODE_SUPPORT) { - if (g->ops.fifo.alloc_usermode_buffers) { - err = g->ops.fifo.alloc_usermode_buffers(c, - gpfifo_args); - if (err) { - nvgpu_err(g, "Usermode buffer alloc failed"); - goto clean_up; - } - c->userd_iova = nvgpu_mem_get_addr(g, - &c->usermode_userd); - c->usermode_submit_enabled = true; - } else { - nvgpu_err(g, "Usermode submit not supported"); - err = -EINVAL; - goto clean_up; - } - } - - err = nvgpu_dma_alloc_map_sys(ch_vm, - gpfifo_size * gpfifo_entry_size, - &c->gpfifo.mem); - if (err) { - nvgpu_err(g, "%s: memory allocation failed", __func__); - goto clean_up_usermode; - } - - if (c->gpfifo.mem.aperture == APERTURE_VIDMEM) { - c->gpfifo.pipe = nvgpu_big_malloc(g, - gpfifo_size * gpfifo_entry_size); - if (!c->gpfifo.pipe) { - err = -ENOMEM; - goto clean_up_unmap; - } - } - - c->gpfifo.entry_num = gpfifo_size; - c->gpfifo.get = c->gpfifo.put = 0; - - nvgpu_log_info(g, "channel %d : gpfifo_base 0x%016llx, size %d", - c->chid, c->gpfifo.mem.gpu_va, c->gpfifo.entry_num); - - g->ops.fifo.setup_userd(c); - - if (!g->aggressive_sync_destroy_thresh) { - nvgpu_mutex_acquire(&c->sync_lock); - c->sync = gk20a_channel_sync_create(c, false); - if (!c->sync) { - err = -ENOMEM; - nvgpu_mutex_release(&c->sync_lock); - goto clean_up_unmap; - } - nvgpu_mutex_release(&c->sync_lock); - - if (g->ops.fifo.resetup_ramfc) { - err = g->ops.fifo.resetup_ramfc(c); - if (err) - goto clean_up_sync; - } - } - - if (!nvgpu_is_timeouts_enabled(c->g) || !c->timeout.enabled) - acquire_timeout = 0; - else - acquire_timeout = c->timeout.limit_ms; - - err = g->ops.fifo.setup_ramfc(c, c->gpfifo.mem.gpu_va, - c->gpfifo.entry_num, - acquire_timeout, gpfifo_args->flags); - if (err) - goto clean_up_sync; - - /* TBD: setup engine contexts */ - - if (gpfifo_args->num_inflight_jobs) { - err = channel_gk20a_prealloc_resources(c, - gpfifo_args->num_inflight_jobs); - if (err) - goto clean_up_sync; - } - - err = channel_gk20a_alloc_priv_cmdbuf(c); - if (err) - goto clean_up_prealloc; - - err = channel_gk20a_update_runlist(c, true); - if (err) - goto clean_up_priv_cmd; - - g->ops.fifo.bind_channel(c); - - nvgpu_log_fn(g, "done"); - return 0; - -clean_up_priv_cmd: - channel_gk20a_free_priv_cmdbuf(c); -clean_up_prealloc: - if (gpfifo_args->num_inflight_jobs) - channel_gk20a_free_prealloc_resources(c); -clean_up_sync: - if (c->sync) { - gk20a_channel_sync_destroy(c->sync, false); - c->sync = NULL; - } -clean_up_unmap: - nvgpu_big_free(g, c->gpfifo.pipe); - nvgpu_dma_unmap_free(ch_vm, &c->gpfifo.mem); -clean_up_usermode: - if (c->usermode_submit_enabled) { - gk20a_channel_free_usermode_buffers(c); - c->userd_iova = nvgpu_mem_get_addr(g, &g->fifo.userd) + - c->chid * g->fifo.userd_entry_size; - c->usermode_submit_enabled = false; - } -clean_up: - memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc)); -clean_up_idle: - if (c->deterministic) { - nvgpu_rwsem_down_read(&g->deterministic_busy); - gk20a_idle(g); - c->deterministic = false; - nvgpu_rwsem_up_read(&g->deterministic_busy); - } - nvgpu_err(g, "fail"); - return err; -} - -void gk20a_channel_free_usermode_buffers(struct channel_gk20a *c) -{ - if (nvgpu_mem_is_valid(&c->usermode_userd)) - nvgpu_dma_free(c->g, &c->usermode_userd); -} - -/* Update with this periodically to determine how the gpfifo is draining. */ -static inline u32 update_gp_get(struct gk20a *g, - struct channel_gk20a *c) -{ - u32 new_get = g->ops.fifo.userd_gp_get(g, c); - - if (new_get < c->gpfifo.get) - c->gpfifo.wrap = !c->gpfifo.wrap; - c->gpfifo.get = new_get; - return new_get; -} - -u32 nvgpu_gp_free_count(struct channel_gk20a *c) -{ - return (c->gpfifo.entry_num - (c->gpfifo.put - c->gpfifo.get) - 1) % - c->gpfifo.entry_num; -} - -bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch, - u32 timeout_delta_ms, bool *progress) -{ - u32 gpfifo_get = update_gp_get(ch->g, ch); - - /* Count consequent timeout isr */ - if (gpfifo_get == ch->timeout_gpfifo_get) { - /* we didn't advance since previous channel timeout check */ - ch->timeout_accumulated_ms += timeout_delta_ms; - *progress = false; - } else { - /* first timeout isr encountered */ - ch->timeout_accumulated_ms = timeout_delta_ms; - *progress = true; - } - - ch->timeout_gpfifo_get = gpfifo_get; - - return nvgpu_is_timeouts_enabled(ch->g) && - ch->timeout_accumulated_ms > ch->timeout_ms_max; -} - -u32 nvgpu_get_gp_free_count(struct channel_gk20a *c) -{ - update_gp_get(c->g, c); - return nvgpu_gp_free_count(c); -} - -static void __gk20a_channel_timeout_start(struct channel_gk20a *ch) -{ - ch->timeout.gp_get = ch->g->ops.fifo.userd_gp_get(ch->g, ch); - ch->timeout.pb_get = ch->g->ops.fifo.userd_pb_get(ch->g, ch); - ch->timeout.running = true; - nvgpu_timeout_init(ch->g, &ch->timeout.timer, - ch->timeout.limit_ms, - NVGPU_TIMER_CPU_TIMER); -} - -/** - * Start a timeout counter (watchdog) on this channel. - * - * Trigger a watchdog to recover the channel after the per-platform timeout - * duration (but strictly no earlier) if the channel hasn't advanced within - * that time. - * - * If the timeout is already running, do nothing. This should be called when - * new jobs are submitted. The timeout will stop when the last tracked job - * finishes, making the channel idle. - * - * The channel's gpfifo read pointer will be used to determine if the job has - * actually stuck at that time. After the timeout duration has expired, a - * worker thread will consider the channel stuck and recover it if stuck. - */ -static void gk20a_channel_timeout_start(struct channel_gk20a *ch) -{ - if (!nvgpu_is_timeouts_enabled(ch->g)) - return; - - if (!ch->timeout.enabled) - return; - - nvgpu_raw_spinlock_acquire(&ch->timeout.lock); - - if (ch->timeout.running) { - nvgpu_raw_spinlock_release(&ch->timeout.lock); - return; - } - __gk20a_channel_timeout_start(ch); - nvgpu_raw_spinlock_release(&ch->timeout.lock); -} - -/** - * Stop a running timeout counter (watchdog) on this channel. - * - * Make the watchdog consider the channel not running, so that it won't get - * recovered even if no progress is detected. Progress is not tracked if the - * watchdog is turned off. - * - * No guarantees are made about concurrent execution of the timeout handler. - * (This should be called from an update handler running in the same thread - * with the watchdog.) - */ -static bool gk20a_channel_timeout_stop(struct channel_gk20a *ch) -{ - bool was_running; - - nvgpu_raw_spinlock_acquire(&ch->timeout.lock); - was_running = ch->timeout.running; - ch->timeout.running = false; - nvgpu_raw_spinlock_release(&ch->timeout.lock); - return was_running; -} - -/** - * Continue a previously stopped timeout - * - * Enable the timeout again but don't reinitialize its timer. - * - * No guarantees are made about concurrent execution of the timeout handler. - * (This should be called from an update handler running in the same thread - * with the watchdog.) - */ -static void gk20a_channel_timeout_continue(struct channel_gk20a *ch) -{ - nvgpu_raw_spinlock_acquire(&ch->timeout.lock); - ch->timeout.running = true; - nvgpu_raw_spinlock_release(&ch->timeout.lock); -} - -/** - * Rewind the timeout on each non-dormant channel. - * - * Reschedule the timeout of each active channel for which timeouts are running - * as if something was happened on each channel right now. This should be - * called when a global hang is detected that could cause a false positive on - * other innocent channels. - */ -void gk20a_channel_timeout_restart_all_channels(struct gk20a *g) -{ - struct fifo_gk20a *f = &g->fifo; - u32 chid; - - for (chid = 0; chid < f->num_channels; chid++) { - struct channel_gk20a *ch = &f->channel[chid]; - - if (!gk20a_channel_get(ch)) - continue; - - nvgpu_raw_spinlock_acquire(&ch->timeout.lock); - if (ch->timeout.running) - __gk20a_channel_timeout_start(ch); - nvgpu_raw_spinlock_release(&ch->timeout.lock); - - gk20a_channel_put(ch); - } -} - -/** - * Check if a timed out channel has hung and recover it if it has. - * - * Test if this channel has really got stuck at this point by checking if its - * {gp,pb}_get has advanced or not. If no {gp,pb}_get action happened since - * when the watchdog was started and it's timed out, force-reset the channel. - * - * The gpu is implicitly on at this point, because the watchdog can only run on - * channels that have submitted jobs pending for cleanup. - */ -static void gk20a_channel_timeout_handler(struct channel_gk20a *ch) -{ - struct gk20a *g = ch->g; - u32 gp_get; - u32 new_gp_get; - u64 pb_get; - u64 new_pb_get; - - nvgpu_log_fn(g, " "); - - /* Get status but keep timer running */ - nvgpu_raw_spinlock_acquire(&ch->timeout.lock); - gp_get = ch->timeout.gp_get; - pb_get = ch->timeout.pb_get; - nvgpu_raw_spinlock_release(&ch->timeout.lock); - - new_gp_get = g->ops.fifo.userd_gp_get(ch->g, ch); - new_pb_get = g->ops.fifo.userd_pb_get(ch->g, ch); - - if (new_gp_get != gp_get || new_pb_get != pb_get) { - /* Channel has advanced, rewind timer */ - gk20a_channel_timeout_stop(ch); - gk20a_channel_timeout_start(ch); - return; - } - - if (!nvgpu_timeout_peek_expired(&ch->timeout.timer)) { - /* Seems stuck but waiting to time out */ - return; - } - - nvgpu_err(g, "Job on channel %d timed out", - ch->chid); - - /* force reset calls gk20a_debug_dump but not this */ - if (ch->timeout.debug_dump) - gk20a_gr_debug_dump(g); - - g->ops.fifo.force_reset_ch(ch, - NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT, - ch->timeout.debug_dump); -} - -/** - * Test if the per-channel watchdog is on; check the timeout in that case. - * - * Each channel has an expiration time based watchdog. The timer is - * (re)initialized in two situations: when a new job is submitted on an idle - * channel and when the timeout is checked but progress is detected. The - * watchdog timeout limit is a coarse sliding window. - * - * The timeout is stopped (disabled) after the last job in a row finishes - * and marks the channel idle. - */ -static void gk20a_channel_timeout_check(struct channel_gk20a *ch) -{ - bool running; - - nvgpu_raw_spinlock_acquire(&ch->timeout.lock); - running = ch->timeout.running; - nvgpu_raw_spinlock_release(&ch->timeout.lock); - - if (running) - gk20a_channel_timeout_handler(ch); -} - -/** - * Loop every living channel, check timeouts and handle stuck channels. - */ -static void gk20a_channel_poll_timeouts(struct gk20a *g) -{ - unsigned int chid; - - - for (chid = 0; chid < g->fifo.num_channels; chid++) { - struct channel_gk20a *ch = &g->fifo.channel[chid]; - - if (gk20a_channel_get(ch)) { - gk20a_channel_timeout_check(ch); - gk20a_channel_put(ch); - } - } -} - -/* - * Process one scheduled work item for this channel. Currently, the only thing - * the worker does is job cleanup handling. - */ -static void gk20a_channel_worker_process_ch(struct channel_gk20a *ch) -{ - nvgpu_log_fn(ch->g, " "); - - gk20a_channel_clean_up_jobs(ch, true); - - /* ref taken when enqueued */ - gk20a_channel_put(ch); -} - -/** - * Tell the worker that one more work needs to be done. - * - * Increase the work counter to synchronize the worker with the new work. Wake - * up the worker. If the worker was already running, it will handle this work - * before going to sleep. - */ -static int __gk20a_channel_worker_wakeup(struct gk20a *g) -{ - int put; - - nvgpu_log_fn(g, " "); - - /* - * Currently, the only work type is associated with a lock, which deals - * with any necessary barriers. If a work type with no locking were - * added, a nvgpu_smp_wmb() would be needed here. See - * ..worker_pending() for a pair. - */ - - put = nvgpu_atomic_inc_return(&g->channel_worker.put); - nvgpu_cond_signal_interruptible(&g->channel_worker.wq); - - return put; -} - -/** - * Test if there is some work pending. - * - * This is a pair for __gk20a_channel_worker_wakeup to be called from the - * worker. The worker has an internal work counter which is incremented once - * per finished work item. This is compared with the number of queued jobs, - * which may be channels on the items list or any other types of work. - */ -static bool __gk20a_channel_worker_pending(struct gk20a *g, int get) -{ - bool pending = nvgpu_atomic_read(&g->channel_worker.put) != get; - - /* - * This would be the place for a nvgpu_smp_rmb() pairing - * a nvgpu_smp_wmb() for a wakeup if we had any work with - * no implicit barriers caused by locking. - */ - - return pending; -} - -/** - * Process the queued works for the worker thread serially. - * - * Flush all the work items in the queue one by one. This may block timeout - * handling for a short while, as these are serialized. - */ -static void gk20a_channel_worker_process(struct gk20a *g, int *get) -{ - - while (__gk20a_channel_worker_pending(g, *get)) { - struct channel_gk20a *ch = NULL; - - /* - * If a channel is on the list, it's guaranteed to be handled - * eventually just once. However, the opposite is not true. A - * channel may be being processed if it's on the list or not. - * - * With this, processing channel works should be conservative - * as follows: it's always safe to look at a channel found in - * the list, and if someone enqueues the channel, it will be - * handled eventually, even if it's being handled at the same - * time. A channel is on the list only once; multiple calls to - * enqueue are harmless. - */ - nvgpu_spinlock_acquire(&g->channel_worker.items_lock); - if (!nvgpu_list_empty(&g->channel_worker.items)) { - ch = nvgpu_list_first_entry(&g->channel_worker.items, - channel_gk20a, - worker_item); - nvgpu_list_del(&ch->worker_item); - } - nvgpu_spinlock_release(&g->channel_worker.items_lock); - - if (!ch) { - /* - * Woke up for some other reason, but there are no - * other reasons than a channel added in the items list - * currently, so warn and ack the message. - */ - nvgpu_warn(g, "Spurious worker event!"); - ++*get; - break; - } - - gk20a_channel_worker_process_ch(ch); - ++*get; - } -} - -/* - * Look at channel states periodically, until canceled. Abort timed out - * channels serially. Process all work items found in the queue. - */ -static int gk20a_channel_poll_worker(void *arg) -{ - struct gk20a *g = (struct gk20a *)arg; - struct gk20a_worker *worker = &g->channel_worker; - unsigned long watchdog_interval = 100; /* milliseconds */ - struct nvgpu_timeout timeout; - int get = 0; - - nvgpu_log_fn(g, " "); - - nvgpu_timeout_init(g, &timeout, watchdog_interval, - NVGPU_TIMER_CPU_TIMER); - while (!nvgpu_thread_should_stop(&worker->poll_task)) { - int ret; - - ret = NVGPU_COND_WAIT_INTERRUPTIBLE( - &worker->wq, - __gk20a_channel_worker_pending(g, get), - watchdog_interval); - - if (ret == 0) - gk20a_channel_worker_process(g, &get); - - if (nvgpu_timeout_peek_expired(&timeout)) { - gk20a_channel_poll_timeouts(g); - nvgpu_timeout_init(g, &timeout, watchdog_interval, - NVGPU_TIMER_CPU_TIMER); - } - } - return 0; -} - -static int __nvgpu_channel_worker_start(struct gk20a *g) -{ - char thread_name[64]; - int err = 0; - - if (nvgpu_thread_is_running(&g->channel_worker.poll_task)) - return err; - - nvgpu_mutex_acquire(&g->channel_worker.start_lock); - - /* - * We don't want to grab a mutex on every channel update so we check - * again if the worker has been initialized before creating a new thread - */ - - /* - * Mutexes have implicit barriers, so there is no risk of a thread - * having a stale copy of the poll_task variable as the call to - * thread_is_running is volatile - */ - - if (nvgpu_thread_is_running(&g->channel_worker.poll_task)) { - nvgpu_mutex_release(&g->channel_worker.start_lock); - return err; - } - - snprintf(thread_name, sizeof(thread_name), - "nvgpu_channel_poll_%s", g->name); - - err = nvgpu_thread_create(&g->channel_worker.poll_task, g, - gk20a_channel_poll_worker, thread_name); - - nvgpu_mutex_release(&g->channel_worker.start_lock); - return err; -} -/** - * Initialize the channel worker's metadata and start the background thread. - */ -int nvgpu_channel_worker_init(struct gk20a *g) -{ - int err; - - nvgpu_atomic_set(&g->channel_worker.put, 0); - nvgpu_cond_init(&g->channel_worker.wq); - nvgpu_init_list_node(&g->channel_worker.items); - nvgpu_spinlock_init(&g->channel_worker.items_lock); - err = nvgpu_mutex_init(&g->channel_worker.start_lock); - if (err) - goto error_check; - - err = __nvgpu_channel_worker_start(g); -error_check: - if (err) { - nvgpu_err(g, "failed to start channel poller thread"); - return err; - } - return 0; -} - -void nvgpu_channel_worker_deinit(struct gk20a *g) -{ - nvgpu_mutex_acquire(&g->channel_worker.start_lock); - nvgpu_thread_stop(&g->channel_worker.poll_task); - nvgpu_mutex_release(&g->channel_worker.start_lock); -} - -/** - * Append a channel to the worker's list, if not there already. - * - * The worker thread processes work items (channels in its work list) and polls - * for other things. This adds @ch to the end of the list and wakes the worker - * up immediately. If the channel already existed in the list, it's not added, - * because in that case it has been scheduled already but has not yet been - * processed. - */ -static void gk20a_channel_worker_enqueue(struct channel_gk20a *ch) -{ - struct gk20a *g = ch->g; - - nvgpu_log_fn(g, " "); - - /* - * Warn if worker thread cannot run - */ - if (WARN_ON(__nvgpu_channel_worker_start(g))) { - nvgpu_warn(g, "channel worker cannot run!"); - return; - } - - /* - * Ref released when this item gets processed. The caller should hold - * one ref already, so normally shouldn't fail, but the channel could - * end up being freed between the time the caller got its reference and - * the time we end up here (e.g., if the client got killed); if so, just - * return. - */ - if (!gk20a_channel_get(ch)) { - nvgpu_info(g, "cannot get ch ref for worker!"); - return; - } - - nvgpu_spinlock_acquire(&g->channel_worker.items_lock); - if (!nvgpu_list_empty(&ch->worker_item)) { - /* - * Already queued, so will get processed eventually. - * The worker is probably awake already. - */ - nvgpu_spinlock_release(&g->channel_worker.items_lock); - gk20a_channel_put(ch); - return; - } - nvgpu_list_add_tail(&ch->worker_item, &g->channel_worker.items); - nvgpu_spinlock_release(&g->channel_worker.items_lock); - - __gk20a_channel_worker_wakeup(g); -} - -int gk20a_free_priv_cmdbuf(struct channel_gk20a *c, struct priv_cmd_entry *e) -{ - struct priv_cmd_queue *q = &c->priv_cmd_q; - struct gk20a *g = c->g; - - if (!e) - return 0; - - if (e->valid) { - /* read the entry's valid flag before reading its contents */ - nvgpu_smp_rmb(); - if ((q->get != e->off) && e->off != 0) - nvgpu_err(g, "requests out-of-order, ch=%d", - c->chid); - q->get = e->off + e->size; - } - - free_priv_cmdbuf(c, e); - - return 0; -} - -int gk20a_channel_add_job(struct channel_gk20a *c, - struct channel_gk20a_job *job, - bool skip_buffer_refcounting) -{ - struct vm_gk20a *vm = c->vm; - struct nvgpu_mapped_buf **mapped_buffers = NULL; - int err = 0, num_mapped_buffers = 0; - bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(c); - - if (!skip_buffer_refcounting) { - err = nvgpu_vm_get_buffers(vm, &mapped_buffers, - &num_mapped_buffers); - if (err) - return err; - } - - /* - * Ref to hold the channel open during the job lifetime. This is - * released by job cleanup launched via syncpt or sema interrupt. - */ - c = gk20a_channel_get(c); - - if (c) { - job->num_mapped_buffers = num_mapped_buffers; - job->mapped_buffers = mapped_buffers; - - gk20a_channel_timeout_start(c); - - if (!pre_alloc_enabled) - channel_gk20a_joblist_lock(c); - - /* - * ensure all pending write complete before adding to the list. - * see corresponding nvgpu_smp_rmb in - * gk20a_channel_clean_up_jobs() - */ - nvgpu_smp_wmb(); - channel_gk20a_joblist_add(c, job); - - if (!pre_alloc_enabled) - channel_gk20a_joblist_unlock(c); - } else { - err = -ETIMEDOUT; - goto err_put_buffers; - } - - return 0; - -err_put_buffers: - nvgpu_vm_put_buffers(vm, mapped_buffers, num_mapped_buffers); - - return err; -} - -/** - * Clean up job resources for further jobs to use. - * @clean_all: If true, process as many jobs as possible, otherwise just one. - * - * Loop all jobs from the joblist until a pending job is found, or just one if - * clean_all is not set. Pending jobs are detected from the job's post fence, - * so this is only done for jobs that have job tracking resources. Free all - * per-job memory for completed jobs; in case of preallocated resources, this - * opens up slots for new jobs to be submitted. - */ -void gk20a_channel_clean_up_jobs(struct channel_gk20a *c, - bool clean_all) -{ - struct vm_gk20a *vm; - struct channel_gk20a_job *job; - struct gk20a *g; - int job_finished = 0; - bool watchdog_on = false; - - c = gk20a_channel_get(c); - if (!c) - return; - - if (!c->g->power_on) { /* shutdown case */ - gk20a_channel_put(c); - return; - } - - vm = c->vm; - g = c->g; - - /* - * If !clean_all, we're in a condition where watchdog isn't supported - * anyway (this would be a no-op). - */ - if (clean_all) - watchdog_on = gk20a_channel_timeout_stop(c); - - /* Synchronize with abort cleanup that needs the jobs. */ - nvgpu_mutex_acquire(&c->joblist.cleanup_lock); - - while (1) { - bool completed; - - channel_gk20a_joblist_lock(c); - if (channel_gk20a_joblist_is_empty(c)) { - /* - * No jobs in flight, timeout will remain stopped until - * new jobs are submitted. - */ - channel_gk20a_joblist_unlock(c); - break; - } - - /* - * ensure that all subsequent reads occur after checking - * that we have a valid node. see corresponding nvgpu_smp_wmb in - * gk20a_channel_add_job(). - */ - nvgpu_smp_rmb(); - job = channel_gk20a_joblist_peek(c); - channel_gk20a_joblist_unlock(c); - - completed = gk20a_fence_is_expired(job->post_fence); - if (!completed) { - /* - * The watchdog eventually sees an updated gp_get if - * something happened in this loop. A new job can have - * been submitted between the above call to stop and - * this - in that case, this is a no-op and the new - * later timeout is still used. - */ - if (clean_all && watchdog_on) - gk20a_channel_timeout_continue(c); - break; - } - - WARN_ON(!c->sync); - - if (c->sync) { - if (c->has_os_fence_framework_support && - g->os_channel.os_fence_framework_inst_exists(c)) - g->os_channel.signal_os_fence_framework(c); - - if (g->aggressive_sync_destroy_thresh) { - nvgpu_mutex_acquire(&c->sync_lock); - if (nvgpu_atomic_dec_and_test( - &c->sync->refcount) && - g->aggressive_sync_destroy) { - gk20a_channel_sync_destroy(c->sync, - false); - c->sync = NULL; - } - nvgpu_mutex_release(&c->sync_lock); - } - } - - if (job->num_mapped_buffers) - nvgpu_vm_put_buffers(vm, job->mapped_buffers, - job->num_mapped_buffers); - - /* Remove job from channel's job list before we close the - * fences, to prevent other callers (gk20a_channel_abort) from - * trying to dereference post_fence when it no longer exists. - */ - channel_gk20a_joblist_lock(c); - channel_gk20a_joblist_delete(c, job); - channel_gk20a_joblist_unlock(c); - - /* Close the fence (this will unref the semaphore and release - * it to the pool). */ - gk20a_fence_put(job->post_fence); - - /* Free the private command buffers (wait_cmd first and - * then incr_cmd i.e. order of allocation) */ - gk20a_free_priv_cmdbuf(c, job->wait_cmd); - gk20a_free_priv_cmdbuf(c, job->incr_cmd); - - /* another bookkeeping taken in add_job. caller must hold a ref - * so this wouldn't get freed here. */ - gk20a_channel_put(c); - - /* - * ensure all pending writes complete before freeing up the job. - * see corresponding nvgpu_smp_rmb in channel_gk20a_alloc_job(). - */ - nvgpu_smp_wmb(); - - channel_gk20a_free_job(c, job); - job_finished = 1; - - /* - * Deterministic channels have a channel-wide power reference; - * for others, there's one per submit. - */ - if (!c->deterministic) - gk20a_idle(g); - - if (!clean_all) { - /* Timeout isn't supported here so don't touch it. */ - break; - } - } - - nvgpu_mutex_release(&c->joblist.cleanup_lock); - - if (job_finished && g->os_channel.work_completion_signal) - g->os_channel.work_completion_signal(c); - - gk20a_channel_put(c); -} - -/** - * Schedule a job cleanup work on this channel to free resources and to signal - * about completion. - * - * Call this when there has been an interrupt about finished jobs, or when job - * cleanup needs to be performed, e.g., when closing a channel. This is always - * safe to call even if there is nothing to clean up. Any visible actions on - * jobs just before calling this are guaranteed to be processed. - */ -void gk20a_channel_update(struct channel_gk20a *c) -{ - if (!c->g->power_on) { /* shutdown case */ - return; - } - - trace_gk20a_channel_update(c->chid); - /* A queued channel is always checked for job cleanup. */ - gk20a_channel_worker_enqueue(c); -} - -/* - * Stop deterministic channel activity for do_idle() when power needs to go off - * momentarily but deterministic channels keep power refs for potentially a - * long time. - * - * Takes write access on g->deterministic_busy. - * - * Must be paired with gk20a_channel_deterministic_unidle(). - */ -void gk20a_channel_deterministic_idle(struct gk20a *g) -{ - struct fifo_gk20a *f = &g->fifo; - u32 chid; - - /* Grab exclusive access to the hw to block new submits */ - nvgpu_rwsem_down_write(&g->deterministic_busy); - - for (chid = 0; chid < f->num_channels; chid++) { - struct channel_gk20a *ch = &f->channel[chid]; - - if (!gk20a_channel_get(ch)) - continue; - - if (ch->deterministic && !ch->deterministic_railgate_allowed) { - /* - * Drop the power ref taken when setting deterministic - * flag. deterministic_unidle will put this and the - * channel ref back. If railgate is allowed separately - * for this channel, the power ref has already been put - * away. - * - * Hold the channel ref: it must not get freed in - * between. A race could otherwise result in lost - * gk20a_busy() via unidle, and in unbalanced - * gk20a_idle() via closing the channel. - */ - gk20a_idle(g); - } else { - /* Not interesting, carry on. */ - gk20a_channel_put(ch); - } - } -} - -/* - * Allow deterministic channel activity again for do_unidle(). - * - * This releases write access on g->deterministic_busy. - */ -void gk20a_channel_deterministic_unidle(struct gk20a *g) -{ - struct fifo_gk20a *f = &g->fifo; - u32 chid; - - for (chid = 0; chid < f->num_channels; chid++) { - struct channel_gk20a *ch = &f->channel[chid]; - - if (!gk20a_channel_get(ch)) - continue; - - /* - * Deterministic state changes inside deterministic_busy lock, - * which we took in deterministic_idle. - */ - if (ch->deterministic && !ch->deterministic_railgate_allowed) { - if (gk20a_busy(g)) - nvgpu_err(g, "cannot busy() again!"); - /* Took this in idle() */ - gk20a_channel_put(ch); - } - - gk20a_channel_put(ch); - } - - /* Release submits, new deterministic channels and frees */ - nvgpu_rwsem_up_write(&g->deterministic_busy); -} - -int gk20a_init_channel_support(struct gk20a *g, u32 chid) -{ - struct channel_gk20a *c = g->fifo.channel+chid; - int err; - - c->g = NULL; - c->chid = chid; - nvgpu_atomic_set(&c->bound, false); - nvgpu_spinlock_init(&c->ref_obtain_lock); - nvgpu_atomic_set(&c->ref_count, 0); - c->referenceable = false; - nvgpu_cond_init(&c->ref_count_dec_wq); - -#if GK20A_CHANNEL_REFCOUNT_TRACKING - nvgpu_spinlock_init(&c->ref_actions_lock); -#endif - nvgpu_spinlock_init(&c->joblist.dynamic.lock); - nvgpu_raw_spinlock_init(&c->timeout.lock); - - nvgpu_init_list_node(&c->joblist.dynamic.jobs); - nvgpu_init_list_node(&c->dbg_s_list); - nvgpu_init_list_node(&c->worker_item); - - err = nvgpu_mutex_init(&c->ioctl_lock); - if (err) - return err; - err = nvgpu_mutex_init(&c->joblist.cleanup_lock); - if (err) - goto fail_1; - err = nvgpu_mutex_init(&c->joblist.pre_alloc.read_lock); - if (err) - goto fail_2; - err = nvgpu_mutex_init(&c->sync_lock); - if (err) - goto fail_3; -#if defined(CONFIG_GK20A_CYCLE_STATS) - err = nvgpu_mutex_init(&c->cyclestate.cyclestate_buffer_mutex); - if (err) - goto fail_4; - err = nvgpu_mutex_init(&c->cs_client_mutex); - if (err) - goto fail_5; -#endif - err = nvgpu_mutex_init(&c->dbg_s_lock); - if (err) - goto fail_6; - - nvgpu_list_add(&c->free_chs, &g->fifo.free_chs); - - return 0; - -fail_6: -#if defined(CONFIG_GK20A_CYCLE_STATS) - nvgpu_mutex_destroy(&c->cs_client_mutex); -fail_5: - nvgpu_mutex_destroy(&c->cyclestate.cyclestate_buffer_mutex); -fail_4: -#endif - nvgpu_mutex_destroy(&c->sync_lock); -fail_3: - nvgpu_mutex_destroy(&c->joblist.pre_alloc.read_lock); -fail_2: - nvgpu_mutex_destroy(&c->joblist.cleanup_lock); -fail_1: - nvgpu_mutex_destroy(&c->ioctl_lock); - - return err; -} - -/* in this context the "channel" is the host1x channel which - * maps to *all* gk20a channels */ -int gk20a_channel_suspend(struct gk20a *g) -{ - struct fifo_gk20a *f = &g->fifo; - u32 chid; - bool channels_in_use = false; - u32 active_runlist_ids = 0; - - nvgpu_log_fn(g, " "); - - for (chid = 0; chid < f->num_channels; chid++) { - struct channel_gk20a *ch = &f->channel[chid]; - if (gk20a_channel_get(ch)) { - nvgpu_log_info(g, "suspend channel %d", chid); - /* disable channel */ - gk20a_disable_channel_tsg(g, ch); - /* preempt the channel */ - gk20a_fifo_preempt(g, ch); - /* wait for channel update notifiers */ - if (g->os_channel.work_completion_cancel_sync) - g->os_channel.work_completion_cancel_sync(ch); - - channels_in_use = true; - - active_runlist_ids |= BIT(ch->runlist_id); - - gk20a_channel_put(ch); - } - } - - if (channels_in_use) { - gk20a_fifo_update_runlist_ids(g, active_runlist_ids, ~0, false, true); - - for (chid = 0; chid < f->num_channels; chid++) { - if (gk20a_channel_get(&f->channel[chid])) { - g->ops.fifo.unbind_channel(&f->channel[chid]); - gk20a_channel_put(&f->channel[chid]); - } - } - } - - nvgpu_log_fn(g, "done"); - return 0; -} - -int gk20a_channel_resume(struct gk20a *g) -{ - struct fifo_gk20a *f = &g->fifo; - u32 chid; - bool channels_in_use = false; - u32 active_runlist_ids = 0; - - nvgpu_log_fn(g, " "); - - for (chid = 0; chid < f->num_channels; chid++) { - if (gk20a_channel_get(&f->channel[chid])) { - nvgpu_log_info(g, "resume channel %d", chid); - g->ops.fifo.bind_channel(&f->channel[chid]); - channels_in_use = true; - active_runlist_ids |= BIT(f->channel[chid].runlist_id); - gk20a_channel_put(&f->channel[chid]); - } - } - - if (channels_in_use) - gk20a_fifo_update_runlist_ids(g, active_runlist_ids, ~0, true, true); - - nvgpu_log_fn(g, "done"); - return 0; -} - -void gk20a_channel_semaphore_wakeup(struct gk20a *g, bool post_events) -{ - struct fifo_gk20a *f = &g->fifo; - u32 chid; - - nvgpu_log_fn(g, " "); - - /* - * Ensure that all pending writes are actually done before trying to - * read semaphore values from DRAM. - */ - g->ops.mm.fb_flush(g); - - for (chid = 0; chid < f->num_channels; chid++) { - struct channel_gk20a *c = g->fifo.channel+chid; - if (gk20a_channel_get(c)) { - if (nvgpu_atomic_read(&c->bound)) { - nvgpu_cond_broadcast_interruptible( - &c->semaphore_wq); - if (post_events) { - if (gk20a_is_channel_marked_as_tsg(c)) { - struct tsg_gk20a *tsg = - &g->fifo.tsg[c->tsgid]; - - g->ops.fifo.post_event_id(tsg, - NVGPU_EVENT_ID_BLOCKING_SYNC); - } - } - /* - * Only non-deterministic channels get the - * channel_update callback. We don't allow - * semaphore-backed syncs for these channels - * anyways, since they have a dependency on - * the sync framework. - * If deterministic channels are receiving a - * semaphore wakeup, it must be for a - * user-space managed - * semaphore. - */ - if (!c->deterministic) - gk20a_channel_update(c); - } - gk20a_channel_put(c); - } - } -} diff --git a/drivers/gpu/nvgpu/os/linux/channel.c b/drivers/gpu/nvgpu/os/linux/channel.c deleted file mode 100644 index fef44f2b..00000000 --- a/drivers/gpu/nvgpu/os/linux/channel.c +++ /dev/null @@ -1,508 +0,0 @@ -/* - * Copyright (c) 2017-2018, NVIDIA Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include -#include -#include - -/* - * This is required for nvgpu_vm_find_buf() which is used in the tracing - * code. Once we can get and access userspace buffers without requiring - * direct dma_buf usage this can be removed. - */ -#include - -#include "gk20a/gk20a.h" - -#include "channel.h" -#include "ioctl_channel.h" -#include "os_linux.h" - -#include - -#include -#include -#include -#include - -#include "sync_sema_android.h" - -u32 nvgpu_submit_gpfifo_user_flags_to_common_flags(u32 user_flags) -{ - u32 flags = 0; - - if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) - flags |= NVGPU_SUBMIT_FLAGS_FENCE_WAIT; - - if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) - flags |= NVGPU_SUBMIT_FLAGS_FENCE_GET; - - if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_HW_FORMAT) - flags |= NVGPU_SUBMIT_FLAGS_HW_FORMAT; - - if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) - flags |= NVGPU_SUBMIT_FLAGS_SYNC_FENCE; - - if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI) - flags |= NVGPU_SUBMIT_FLAGS_SUPPRESS_WFI; - - if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING) - flags |= NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING; - - return flags; -} - -/* - * API to convert error_notifiers in common code and of the form - * NVGPU_ERR_NOTIFIER_* into Linux specific error_notifiers exposed to user - * space and of the form NVGPU_CHANNEL_* - */ -static u32 nvgpu_error_notifier_to_channel_notifier(u32 error_notifier) -{ - switch (error_notifier) { - case NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT: - return NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT; - case NVGPU_ERR_NOTIFIER_GR_ERROR_SW_METHOD: - return NVGPU_CHANNEL_GR_ERROR_SW_METHOD; - case NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY: - return NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY; - case NVGPU_ERR_NOTIFIER_GR_EXCEPTION: - return NVGPU_CHANNEL_GR_EXCEPTION; - case NVGPU_ERR_NOTIFIER_GR_SEMAPHORE_TIMEOUT: - return NVGPU_CHANNEL_GR_SEMAPHORE_TIMEOUT; - case NVGPU_ERR_NOTIFIER_GR_ILLEGAL_NOTIFY: - return NVGPU_CHANNEL_GR_ILLEGAL_NOTIFY; - case NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT: - return NVGPU_CHANNEL_FIFO_ERROR_MMU_ERR_FLT; - case NVGPU_ERR_NOTIFIER_PBDMA_ERROR: - return NVGPU_CHANNEL_PBDMA_ERROR; - case NVGPU_ERR_NOTIFIER_FECS_ERR_UNIMP_FIRMWARE_METHOD: - return NVGPU_CHANNEL_FECS_ERR_UNIMP_FIRMWARE_METHOD; - case NVGPU_ERR_NOTIFIER_RESETCHANNEL_VERIF_ERROR: - return NVGPU_CHANNEL_RESETCHANNEL_VERIF_ERROR; - case NVGPU_ERR_NOTIFIER_PBDMA_PUSHBUFFER_CRC_MISMATCH: - return NVGPU_CHANNEL_PBDMA_PUSHBUFFER_CRC_MISMATCH; - } - - pr_warn("%s: invalid error_notifier requested %u\n", __func__, error_notifier); - - return error_notifier; -} - -/** - * nvgpu_set_error_notifier_locked() - * Should be called with ch->error_notifier_mutex held - * - * error should be of the form NVGPU_ERR_NOTIFIER_* - */ -void nvgpu_set_error_notifier_locked(struct channel_gk20a *ch, u32 error) -{ - struct nvgpu_channel_linux *priv = ch->os_priv; - - error = nvgpu_error_notifier_to_channel_notifier(error); - - if (priv->error_notifier.dmabuf) { - struct nvgpu_notification *notification = - priv->error_notifier.notification; - struct timespec time_data; - u64 nsec; - - getnstimeofday(&time_data); - nsec = ((u64)time_data.tv_sec) * 1000000000u + - (u64)time_data.tv_nsec; - notification->time_stamp.nanoseconds[0] = - (u32)nsec; - notification->time_stamp.nanoseconds[1] = - (u32)(nsec >> 32); - notification->info32 = error; - notification->status = 0xffff; - - nvgpu_err(ch->g, - "error notifier set to %d for ch %d", error, ch->chid); - } -} - -/* error should be of the form NVGPU_ERR_NOTIFIER_* */ -void nvgpu_set_error_notifier(struct channel_gk20a *ch, u32 error) -{ - struct nvgpu_channel_linux *priv = ch->os_priv; - - nvgpu_mutex_acquire(&priv->error_notifier.mutex); - nvgpu_set_error_notifier_locked(ch, error); - nvgpu_mutex_release(&priv->error_notifier.mutex); -} - -void nvgpu_set_error_notifier_if_empty(struct channel_gk20a *ch, u32 error) -{ - struct nvgpu_channel_linux *priv = ch->os_priv; - - nvgpu_mutex_acquire(&priv->error_notifier.mutex); - if (priv->error_notifier.dmabuf) { - struct nvgpu_notification *notification = - priv->error_notifier.notification; - - /* Don't overwrite error flag if it is already set */ - if (notification->status != 0xffff) - nvgpu_set_error_notifier_locked(ch, error); - } - nvgpu_mutex_release(&priv->error_notifier.mutex); -} - -/* error_notifier should be of the form NVGPU_ERR_NOTIFIER_* */ -bool nvgpu_is_error_notifier_set(struct channel_gk20a *ch, u32 error_notifier) -{ - struct nvgpu_channel_linux *priv = ch->os_priv; - bool notifier_set = false; - - error_notifier = nvgpu_error_notifier_to_channel_notifier(error_notifier); - - nvgpu_mutex_acquire(&priv->error_notifier.mutex); - if (priv->error_notifier.dmabuf) { - struct nvgpu_notification *notification = - priv->error_notifier.notification; - u32 err = notification->info32; - - if (err == error_notifier) - notifier_set = true; - } - nvgpu_mutex_release(&priv->error_notifier.mutex); - - return notifier_set; -} - -static void gk20a_channel_update_runcb_fn(struct work_struct *work) -{ - struct nvgpu_channel_completion_cb *completion_cb = - container_of(work, struct nvgpu_channel_completion_cb, work); - struct nvgpu_channel_linux *priv = - container_of(completion_cb, - struct nvgpu_channel_linux, completion_cb); - struct channel_gk20a *ch = priv->ch; - void (*fn)(struct channel_gk20a *, void *); - void *user_data; - - nvgpu_spinlock_acquire(&completion_cb->lock); - fn = completion_cb->fn; - user_data = completion_cb->user_data; - nvgpu_spinlock_release(&completion_cb->lock); - - if (fn) - fn(ch, user_data); -} - -static void nvgpu_channel_work_completion_init(struct channel_gk20a *ch) -{ - struct nvgpu_channel_linux *priv = ch->os_priv; - - priv->completion_cb.fn = NULL; - priv->completion_cb.user_data = NULL; - nvgpu_spinlock_init(&priv->completion_cb.lock); - INIT_WORK(&priv->completion_cb.work, gk20a_channel_update_runcb_fn); -} - -static void nvgpu_channel_work_completion_clear(struct channel_gk20a *ch) -{ - struct nvgpu_channel_linux *priv = ch->os_priv; - - nvgpu_spinlock_acquire(&priv->completion_cb.lock); - priv->completion_cb.fn = NULL; - priv->completion_cb.user_data = NULL; - nvgpu_spinlock_release(&priv->completion_cb.lock); - cancel_work_sync(&priv->completion_cb.work); -} - -static void nvgpu_channel_work_completion_signal(struct channel_gk20a *ch) -{ - struct nvgpu_channel_linux *priv = ch->os_priv; - - if (priv->completion_cb.fn) - schedule_work(&priv->completion_cb.work); -} - -static void nvgpu_channel_work_completion_cancel_sync(struct channel_gk20a *ch) -{ - struct nvgpu_channel_linux *priv = ch->os_priv; - - if (priv->completion_cb.fn) - cancel_work_sync(&priv->completion_cb.work); -} - -struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g, - void (*update_fn)(struct channel_gk20a *, void *), - void *update_fn_data, - int runlist_id, - bool is_privileged_channel) -{ - struct channel_gk20a *ch; - struct nvgpu_channel_linux *priv; - - ch = gk20a_open_new_channel(g, runlist_id, is_privileged_channel, - nvgpu_current_pid(g), nvgpu_current_tid(g)); - - if (ch) { - priv = ch->os_priv; - nvgpu_spinlock_acquire(&priv->completion_cb.lock); - priv->completion_cb.fn = update_fn; - priv->completion_cb.user_data = update_fn_data; - nvgpu_spinlock_release(&priv->completion_cb.lock); - } - - return ch; -} - -static void nvgpu_channel_open_linux(struct channel_gk20a *ch) -{ -} - -static void nvgpu_channel_close_linux(struct channel_gk20a *ch) -{ - nvgpu_channel_work_completion_clear(ch); - -#if defined(CONFIG_GK20A_CYCLE_STATS) - gk20a_channel_free_cycle_stats_buffer(ch); - gk20a_channel_free_cycle_stats_snapshot(ch); -#endif -} - -static int nvgpu_channel_alloc_linux(struct gk20a *g, struct channel_gk20a *ch) -{ - struct nvgpu_channel_linux *priv; - int err; - - priv = nvgpu_kzalloc(g, sizeof(*priv)); - if (!priv) - return -ENOMEM; - - ch->os_priv = priv; - priv->ch = ch; - -#ifdef CONFIG_SYNC - ch->has_os_fence_framework_support = true; -#endif - - err = nvgpu_mutex_init(&priv->error_notifier.mutex); - if (err) { - nvgpu_kfree(g, priv); - return err; - } - - nvgpu_channel_work_completion_init(ch); - - return 0; -} - -static void nvgpu_channel_free_linux(struct gk20a *g, struct channel_gk20a *ch) -{ - struct nvgpu_channel_linux *priv = ch->os_priv; - - nvgpu_mutex_destroy(&priv->error_notifier.mutex); - nvgpu_kfree(g, priv); - - ch->os_priv = NULL; - -#ifdef CONFIG_SYNC - ch->has_os_fence_framework_support = false; -#endif -} - -static int nvgpu_channel_init_os_fence_framework(struct channel_gk20a *ch, - const char *fmt, ...) -{ - struct nvgpu_channel_linux *priv = ch->os_priv; - struct nvgpu_os_fence_framework *fence_framework; - char name[30]; - va_list args; - - fence_framework = &priv->fence_framework; - - va_start(args, fmt); - vsnprintf(name, sizeof(name), fmt, args); - va_end(args); - - fence_framework->timeline = gk20a_sync_timeline_create(name); - - if (!fence_framework->timeline) - return -EINVAL; - - return 0; -} -static void nvgpu_channel_signal_os_fence_framework(struct channel_gk20a *ch) -{ - struct nvgpu_channel_linux *priv = ch->os_priv; - struct nvgpu_os_fence_framework *fence_framework; - - fence_framework = &priv->fence_framework; - - gk20a_sync_timeline_signal(fence_framework->timeline); -} - -static void nvgpu_channel_destroy_os_fence_framework(struct channel_gk20a *ch) -{ - struct nvgpu_channel_linux *priv = ch->os_priv; - struct nvgpu_os_fence_framework *fence_framework; - - fence_framework = &priv->fence_framework; - - gk20a_sync_timeline_destroy(fence_framework->timeline); - fence_framework->timeline = NULL; -} - -static bool nvgpu_channel_fence_framework_exists(struct channel_gk20a *ch) -{ - struct nvgpu_channel_linux *priv = ch->os_priv; - struct nvgpu_os_fence_framework *fence_framework; - - fence_framework = &priv->fence_framework; - - return (fence_framework->timeline != NULL); -} - -static int nvgpu_channel_copy_user_gpfifo(struct nvgpu_gpfifo_entry *dest, - struct nvgpu_gpfifo_userdata userdata, u32 start, u32 length) -{ - struct nvgpu_gpfifo_entry __user *user_gpfifo = userdata.entries; - unsigned long n; - - n = copy_from_user(dest, user_gpfifo + start, - length * sizeof(struct nvgpu_gpfifo_entry)); - - return n == 0 ? 0 : -EFAULT; -} - -int nvgpu_init_channel_support_linux(struct nvgpu_os_linux *l) -{ - struct gk20a *g = &l->g; - struct fifo_gk20a *f = &g->fifo; - int chid; - int err; - - for (chid = 0; chid < (int)f->num_channels; chid++) { - struct channel_gk20a *ch = &f->channel[chid]; - - err = nvgpu_channel_alloc_linux(g, ch); - if (err) - goto err_clean; - } - - g->os_channel.open = nvgpu_channel_open_linux; - g->os_channel.close = nvgpu_channel_close_linux; - g->os_channel.work_completion_signal = - nvgpu_channel_work_completion_signal; - g->os_channel.work_completion_cancel_sync = - nvgpu_channel_work_completion_cancel_sync; - - g->os_channel.os_fence_framework_inst_exists = - nvgpu_channel_fence_framework_exists; - g->os_channel.init_os_fence_framework = - nvgpu_channel_init_os_fence_framework; - g->os_channel.signal_os_fence_framework = - nvgpu_channel_signal_os_fence_framework; - g->os_channel.destroy_os_fence_framework = - nvgpu_channel_destroy_os_fence_framework; - - g->os_channel.copy_user_gpfifo = - nvgpu_channel_copy_user_gpfifo; - - return 0; - -err_clean: - for (; chid >= 0; chid--) { - struct channel_gk20a *ch = &f->channel[chid]; - - nvgpu_channel_free_linux(g, ch); - } - return err; -} - -void nvgpu_remove_channel_support_linux(struct nvgpu_os_linux *l) -{ - struct gk20a *g = &l->g; - struct fifo_gk20a *f = &g->fifo; - unsigned int chid; - - for (chid = 0; chid < f->num_channels; chid++) { - struct channel_gk20a *ch = &f->channel[chid]; - - nvgpu_channel_free_linux(g, ch); - } - - g->os_channel.os_fence_framework_inst_exists = NULL; - g->os_channel.init_os_fence_framework = NULL; - g->os_channel.signal_os_fence_framework = NULL; - g->os_channel.destroy_os_fence_framework = NULL; -} - -u32 nvgpu_get_gpfifo_entry_size(void) -{ - return sizeof(struct nvgpu_gpfifo_entry); -} - -#ifdef CONFIG_DEBUG_FS -static void trace_write_pushbuffer(struct channel_gk20a *c, - struct nvgpu_gpfifo_entry *g) -{ - void *mem = NULL; - unsigned int words; - u64 offset; - struct dma_buf *dmabuf = NULL; - - if (gk20a_debug_trace_cmdbuf) { - u64 gpu_va = (u64)g->entry0 | - (u64)((u64)pbdma_gp_entry1_get_hi_v(g->entry1) << 32); - int err; - - words = pbdma_gp_entry1_length_v(g->entry1); - err = nvgpu_vm_find_buf(c->vm, gpu_va, &dmabuf, &offset); - if (!err) - mem = dma_buf_vmap(dmabuf); - } - - if (mem) { - u32 i; - /* - * Write in batches of 128 as there seems to be a limit - * of how much you can output to ftrace at once. - */ - for (i = 0; i < words; i += 128U) { - trace_gk20a_push_cmdbuf( - c->g->name, - 0, - min(words - i, 128U), - offset + i * sizeof(u32), - mem); - } - dma_buf_vunmap(dmabuf, mem); - } -} - -void trace_write_pushbuffers(struct channel_gk20a *c, u32 count) -{ - struct nvgpu_gpfifo_entry *gp = c->gpfifo.mem.cpu_va; - u32 n = c->gpfifo.entry_num; - u32 start = c->gpfifo.put; - u32 i; - - if (!gk20a_debug_trace_cmdbuf) - return; - - if (!gp) - return; - - for (i = 0; i < count; i++) - trace_write_pushbuffer(c, &gp[(start + i) % n]); -} -#endif diff --git a/drivers/gpu/nvgpu/os/linux/linux-channel.c b/drivers/gpu/nvgpu/os/linux/linux-channel.c new file mode 100644 index 00000000..fef44f2b --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/linux-channel.c @@ -0,0 +1,508 @@ +/* + * Copyright (c) 2017-2018, NVIDIA Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include + +/* + * This is required for nvgpu_vm_find_buf() which is used in the tracing + * code. Once we can get and access userspace buffers without requiring + * direct dma_buf usage this can be removed. + */ +#include + +#include "gk20a/gk20a.h" + +#include "channel.h" +#include "ioctl_channel.h" +#include "os_linux.h" + +#include + +#include +#include +#include +#include + +#include "sync_sema_android.h" + +u32 nvgpu_submit_gpfifo_user_flags_to_common_flags(u32 user_flags) +{ + u32 flags = 0; + + if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) + flags |= NVGPU_SUBMIT_FLAGS_FENCE_WAIT; + + if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) + flags |= NVGPU_SUBMIT_FLAGS_FENCE_GET; + + if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_HW_FORMAT) + flags |= NVGPU_SUBMIT_FLAGS_HW_FORMAT; + + if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) + flags |= NVGPU_SUBMIT_FLAGS_SYNC_FENCE; + + if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI) + flags |= NVGPU_SUBMIT_FLAGS_SUPPRESS_WFI; + + if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING) + flags |= NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING; + + return flags; +} + +/* + * API to convert error_notifiers in common code and of the form + * NVGPU_ERR_NOTIFIER_* into Linux specific error_notifiers exposed to user + * space and of the form NVGPU_CHANNEL_* + */ +static u32 nvgpu_error_notifier_to_channel_notifier(u32 error_notifier) +{ + switch (error_notifier) { + case NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT: + return NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT; + case NVGPU_ERR_NOTIFIER_GR_ERROR_SW_METHOD: + return NVGPU_CHANNEL_GR_ERROR_SW_METHOD; + case NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY: + return NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY; + case NVGPU_ERR_NOTIFIER_GR_EXCEPTION: + return NVGPU_CHANNEL_GR_EXCEPTION; + case NVGPU_ERR_NOTIFIER_GR_SEMAPHORE_TIMEOUT: + return NVGPU_CHANNEL_GR_SEMAPHORE_TIMEOUT; + case NVGPU_ERR_NOTIFIER_GR_ILLEGAL_NOTIFY: + return NVGPU_CHANNEL_GR_ILLEGAL_NOTIFY; + case NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT: + return NVGPU_CHANNEL_FIFO_ERROR_MMU_ERR_FLT; + case NVGPU_ERR_NOTIFIER_PBDMA_ERROR: + return NVGPU_CHANNEL_PBDMA_ERROR; + case NVGPU_ERR_NOTIFIER_FECS_ERR_UNIMP_FIRMWARE_METHOD: + return NVGPU_CHANNEL_FECS_ERR_UNIMP_FIRMWARE_METHOD; + case NVGPU_ERR_NOTIFIER_RESETCHANNEL_VERIF_ERROR: + return NVGPU_CHANNEL_RESETCHANNEL_VERIF_ERROR; + case NVGPU_ERR_NOTIFIER_PBDMA_PUSHBUFFER_CRC_MISMATCH: + return NVGPU_CHANNEL_PBDMA_PUSHBUFFER_CRC_MISMATCH; + } + + pr_warn("%s: invalid error_notifier requested %u\n", __func__, error_notifier); + + return error_notifier; +} + +/** + * nvgpu_set_error_notifier_locked() + * Should be called with ch->error_notifier_mutex held + * + * error should be of the form NVGPU_ERR_NOTIFIER_* + */ +void nvgpu_set_error_notifier_locked(struct channel_gk20a *ch, u32 error) +{ + struct nvgpu_channel_linux *priv = ch->os_priv; + + error = nvgpu_error_notifier_to_channel_notifier(error); + + if (priv->error_notifier.dmabuf) { + struct nvgpu_notification *notification = + priv->error_notifier.notification; + struct timespec time_data; + u64 nsec; + + getnstimeofday(&time_data); + nsec = ((u64)time_data.tv_sec) * 1000000000u + + (u64)time_data.tv_nsec; + notification->time_stamp.nanoseconds[0] = + (u32)nsec; + notification->time_stamp.nanoseconds[1] = + (u32)(nsec >> 32); + notification->info32 = error; + notification->status = 0xffff; + + nvgpu_err(ch->g, + "error notifier set to %d for ch %d", error, ch->chid); + } +} + +/* error should be of the form NVGPU_ERR_NOTIFIER_* */ +void nvgpu_set_error_notifier(struct channel_gk20a *ch, u32 error) +{ + struct nvgpu_channel_linux *priv = ch->os_priv; + + nvgpu_mutex_acquire(&priv->error_notifier.mutex); + nvgpu_set_error_notifier_locked(ch, error); + nvgpu_mutex_release(&priv->error_notifier.mutex); +} + +void nvgpu_set_error_notifier_if_empty(struct channel_gk20a *ch, u32 error) +{ + struct nvgpu_channel_linux *priv = ch->os_priv; + + nvgpu_mutex_acquire(&priv->error_notifier.mutex); + if (priv->error_notifier.dmabuf) { + struct nvgpu_notification *notification = + priv->error_notifier.notification; + + /* Don't overwrite error flag if it is already set */ + if (notification->status != 0xffff) + nvgpu_set_error_notifier_locked(ch, error); + } + nvgpu_mutex_release(&priv->error_notifier.mutex); +} + +/* error_notifier should be of the form NVGPU_ERR_NOTIFIER_* */ +bool nvgpu_is_error_notifier_set(struct channel_gk20a *ch, u32 error_notifier) +{ + struct nvgpu_channel_linux *priv = ch->os_priv; + bool notifier_set = false; + + error_notifier = nvgpu_error_notifier_to_channel_notifier(error_notifier); + + nvgpu_mutex_acquire(&priv->error_notifier.mutex); + if (priv->error_notifier.dmabuf) { + struct nvgpu_notification *notification = + priv->error_notifier.notification; + u32 err = notification->info32; + + if (err == error_notifier) + notifier_set = true; + } + nvgpu_mutex_release(&priv->error_notifier.mutex); + + return notifier_set; +} + +static void gk20a_channel_update_runcb_fn(struct work_struct *work) +{ + struct nvgpu_channel_completion_cb *completion_cb = + container_of(work, struct nvgpu_channel_completion_cb, work); + struct nvgpu_channel_linux *priv = + container_of(completion_cb, + struct nvgpu_channel_linux, completion_cb); + struct channel_gk20a *ch = priv->ch; + void (*fn)(struct channel_gk20a *, void *); + void *user_data; + + nvgpu_spinlock_acquire(&completion_cb->lock); + fn = completion_cb->fn; + user_data = completion_cb->user_data; + nvgpu_spinlock_release(&completion_cb->lock); + + if (fn) + fn(ch, user_data); +} + +static void nvgpu_channel_work_completion_init(struct channel_gk20a *ch) +{ + struct nvgpu_channel_linux *priv = ch->os_priv; + + priv->completion_cb.fn = NULL; + priv->completion_cb.user_data = NULL; + nvgpu_spinlock_init(&priv->completion_cb.lock); + INIT_WORK(&priv->completion_cb.work, gk20a_channel_update_runcb_fn); +} + +static void nvgpu_channel_work_completion_clear(struct channel_gk20a *ch) +{ + struct nvgpu_channel_linux *priv = ch->os_priv; + + nvgpu_spinlock_acquire(&priv->completion_cb.lock); + priv->completion_cb.fn = NULL; + priv->completion_cb.user_data = NULL; + nvgpu_spinlock_release(&priv->completion_cb.lock); + cancel_work_sync(&priv->completion_cb.work); +} + +static void nvgpu_channel_work_completion_signal(struct channel_gk20a *ch) +{ + struct nvgpu_channel_linux *priv = ch->os_priv; + + if (priv->completion_cb.fn) + schedule_work(&priv->completion_cb.work); +} + +static void nvgpu_channel_work_completion_cancel_sync(struct channel_gk20a *ch) +{ + struct nvgpu_channel_linux *priv = ch->os_priv; + + if (priv->completion_cb.fn) + cancel_work_sync(&priv->completion_cb.work); +} + +struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g, + void (*update_fn)(struct channel_gk20a *, void *), + void *update_fn_data, + int runlist_id, + bool is_privileged_channel) +{ + struct channel_gk20a *ch; + struct nvgpu_channel_linux *priv; + + ch = gk20a_open_new_channel(g, runlist_id, is_privileged_channel, + nvgpu_current_pid(g), nvgpu_current_tid(g)); + + if (ch) { + priv = ch->os_priv; + nvgpu_spinlock_acquire(&priv->completion_cb.lock); + priv->completion_cb.fn = update_fn; + priv->completion_cb.user_data = update_fn_data; + nvgpu_spinlock_release(&priv->completion_cb.lock); + } + + return ch; +} + +static void nvgpu_channel_open_linux(struct channel_gk20a *ch) +{ +} + +static void nvgpu_channel_close_linux(struct channel_gk20a *ch) +{ + nvgpu_channel_work_completion_clear(ch); + +#if defined(CONFIG_GK20A_CYCLE_STATS) + gk20a_channel_free_cycle_stats_buffer(ch); + gk20a_channel_free_cycle_stats_snapshot(ch); +#endif +} + +static int nvgpu_channel_alloc_linux(struct gk20a *g, struct channel_gk20a *ch) +{ + struct nvgpu_channel_linux *priv; + int err; + + priv = nvgpu_kzalloc(g, sizeof(*priv)); + if (!priv) + return -ENOMEM; + + ch->os_priv = priv; + priv->ch = ch; + +#ifdef CONFIG_SYNC + ch->has_os_fence_framework_support = true; +#endif + + err = nvgpu_mutex_init(&priv->error_notifier.mutex); + if (err) { + nvgpu_kfree(g, priv); + return err; + } + + nvgpu_channel_work_completion_init(ch); + + return 0; +} + +static void nvgpu_channel_free_linux(struct gk20a *g, struct channel_gk20a *ch) +{ + struct nvgpu_channel_linux *priv = ch->os_priv; + + nvgpu_mutex_destroy(&priv->error_notifier.mutex); + nvgpu_kfree(g, priv); + + ch->os_priv = NULL; + +#ifdef CONFIG_SYNC + ch->has_os_fence_framework_support = false; +#endif +} + +static int nvgpu_channel_init_os_fence_framework(struct channel_gk20a *ch, + const char *fmt, ...) +{ + struct nvgpu_channel_linux *priv = ch->os_priv; + struct nvgpu_os_fence_framework *fence_framework; + char name[30]; + va_list args; + + fence_framework = &priv->fence_framework; + + va_start(args, fmt); + vsnprintf(name, sizeof(name), fmt, args); + va_end(args); + + fence_framework->timeline = gk20a_sync_timeline_create(name); + + if (!fence_framework->timeline) + return -EINVAL; + + return 0; +} +static void nvgpu_channel_signal_os_fence_framework(struct channel_gk20a *ch) +{ + struct nvgpu_channel_linux *priv = ch->os_priv; + struct nvgpu_os_fence_framework *fence_framework; + + fence_framework = &priv->fence_framework; + + gk20a_sync_timeline_signal(fence_framework->timeline); +} + +static void nvgpu_channel_destroy_os_fence_framework(struct channel_gk20a *ch) +{ + struct nvgpu_channel_linux *priv = ch->os_priv; + struct nvgpu_os_fence_framework *fence_framework; + + fence_framework = &priv->fence_framework; + + gk20a_sync_timeline_destroy(fence_framework->timeline); + fence_framework->timeline = NULL; +} + +static bool nvgpu_channel_fence_framework_exists(struct channel_gk20a *ch) +{ + struct nvgpu_channel_linux *priv = ch->os_priv; + struct nvgpu_os_fence_framework *fence_framework; + + fence_framework = &priv->fence_framework; + + return (fence_framework->timeline != NULL); +} + +static int nvgpu_channel_copy_user_gpfifo(struct nvgpu_gpfifo_entry *dest, + struct nvgpu_gpfifo_userdata userdata, u32 start, u32 length) +{ + struct nvgpu_gpfifo_entry __user *user_gpfifo = userdata.entries; + unsigned long n; + + n = copy_from_user(dest, user_gpfifo + start, + length * sizeof(struct nvgpu_gpfifo_entry)); + + return n == 0 ? 0 : -EFAULT; +} + +int nvgpu_init_channel_support_linux(struct nvgpu_os_linux *l) +{ + struct gk20a *g = &l->g; + struct fifo_gk20a *f = &g->fifo; + int chid; + int err; + + for (chid = 0; chid < (int)f->num_channels; chid++) { + struct channel_gk20a *ch = &f->channel[chid]; + + err = nvgpu_channel_alloc_linux(g, ch); + if (err) + goto err_clean; + } + + g->os_channel.open = nvgpu_channel_open_linux; + g->os_channel.close = nvgpu_channel_close_linux; + g->os_channel.work_completion_signal = + nvgpu_channel_work_completion_signal; + g->os_channel.work_completion_cancel_sync = + nvgpu_channel_work_completion_cancel_sync; + + g->os_channel.os_fence_framework_inst_exists = + nvgpu_channel_fence_framework_exists; + g->os_channel.init_os_fence_framework = + nvgpu_channel_init_os_fence_framework; + g->os_channel.signal_os_fence_framework = + nvgpu_channel_signal_os_fence_framework; + g->os_channel.destroy_os_fence_framework = + nvgpu_channel_destroy_os_fence_framework; + + g->os_channel.copy_user_gpfifo = + nvgpu_channel_copy_user_gpfifo; + + return 0; + +err_clean: + for (; chid >= 0; chid--) { + struct channel_gk20a *ch = &f->channel[chid]; + + nvgpu_channel_free_linux(g, ch); + } + return err; +} + +void nvgpu_remove_channel_support_linux(struct nvgpu_os_linux *l) +{ + struct gk20a *g = &l->g; + struct fifo_gk20a *f = &g->fifo; + unsigned int chid; + + for (chid = 0; chid < f->num_channels; chid++) { + struct channel_gk20a *ch = &f->channel[chid]; + + nvgpu_channel_free_linux(g, ch); + } + + g->os_channel.os_fence_framework_inst_exists = NULL; + g->os_channel.init_os_fence_framework = NULL; + g->os_channel.signal_os_fence_framework = NULL; + g->os_channel.destroy_os_fence_framework = NULL; +} + +u32 nvgpu_get_gpfifo_entry_size(void) +{ + return sizeof(struct nvgpu_gpfifo_entry); +} + +#ifdef CONFIG_DEBUG_FS +static void trace_write_pushbuffer(struct channel_gk20a *c, + struct nvgpu_gpfifo_entry *g) +{ + void *mem = NULL; + unsigned int words; + u64 offset; + struct dma_buf *dmabuf = NULL; + + if (gk20a_debug_trace_cmdbuf) { + u64 gpu_va = (u64)g->entry0 | + (u64)((u64)pbdma_gp_entry1_get_hi_v(g->entry1) << 32); + int err; + + words = pbdma_gp_entry1_length_v(g->entry1); + err = nvgpu_vm_find_buf(c->vm, gpu_va, &dmabuf, &offset); + if (!err) + mem = dma_buf_vmap(dmabuf); + } + + if (mem) { + u32 i; + /* + * Write in batches of 128 as there seems to be a limit + * of how much you can output to ftrace at once. + */ + for (i = 0; i < words; i += 128U) { + trace_gk20a_push_cmdbuf( + c->g->name, + 0, + min(words - i, 128U), + offset + i * sizeof(u32), + mem); + } + dma_buf_vunmap(dmabuf, mem); + } +} + +void trace_write_pushbuffers(struct channel_gk20a *c, u32 count) +{ + struct nvgpu_gpfifo_entry *gp = c->gpfifo.mem.cpu_va; + u32 n = c->gpfifo.entry_num; + u32 start = c->gpfifo.put; + u32 i; + + if (!gk20a_debug_trace_cmdbuf) + return; + + if (!gp) + return; + + for (i = 0; i < count; i++) + trace_write_pushbuffer(c, &gp[(start + i) % n]); +} +#endif diff --git a/drivers/gpu/nvgpu/os/posix/channel.c b/drivers/gpu/nvgpu/os/posix/channel.c deleted file mode 100644 index 05697159..00000000 --- a/drivers/gpu/nvgpu/os/posix/channel.c +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include "gk20a/channel_gk20a.h" - -u32 nvgpu_get_gpfifo_entry_size(void) -{ - /* - * There is no struct nvgpu_gpfifo for us to use yet. But when it's - * defined in userspace this is how big it will be. - */ - return 8; -} diff --git a/drivers/gpu/nvgpu/os/posix/posix-channel.c b/drivers/gpu/nvgpu/os/posix/posix-channel.c new file mode 100644 index 00000000..05697159 --- /dev/null +++ b/drivers/gpu/nvgpu/os/posix/posix-channel.c @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "gk20a/channel_gk20a.h" + +u32 nvgpu_get_gpfifo_entry_size(void) +{ + /* + * There is no struct nvgpu_gpfifo for us to use yet. But when it's + * defined in userspace this is how big it will be. + */ + return 8; +} -- cgit v1.2.2