/* * GK20A Graphics channel * * Copyright (c) 2011-2015, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, * version 2, as published by the Free Software Foundation. * * This program is distributed in the hope it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include #include #include #include /* need for nvmap.h*/ #include #include #include #include #include #include #include "debug_gk20a.h" #include "gk20a.h" #include "dbg_gpu_gk20a.h" #include "fence_gk20a.h" #include "semaphore_gk20a.h" #include "hw_ram_gk20a.h" #include "hw_fifo_gk20a.h" #include "hw_pbdma_gk20a.h" #include "hw_ccsr_gk20a.h" #include "hw_ltc_gk20a.h" #define NVMAP_HANDLE_PARAM_SIZE 1 static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f); static void free_channel(struct fifo_gk20a *f, struct channel_gk20a *c); static void free_priv_cmdbuf(struct channel_gk20a *c, struct priv_cmd_entry *e); static void recycle_priv_cmdbuf(struct channel_gk20a *c); static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c); static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c); static int channel_gk20a_commit_userd(struct channel_gk20a *c); static int channel_gk20a_setup_userd(struct channel_gk20a *c); static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a); static int channel_gk20a_update_runlist(struct channel_gk20a *c, bool add); static void gk20a_free_error_notifiers(struct channel_gk20a *ch); /* allocate GPU channel */ static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f) { struct channel_gk20a *ch = NULL; mutex_lock(&f->free_chs_mutex); if (!list_empty(&f->free_chs)) { ch = list_first_entry(&f->free_chs, struct channel_gk20a, free_chs); list_del(&ch->free_chs); WARN_ON(atomic_read(&ch->ref_count)); WARN_ON(ch->referenceable); } mutex_unlock(&f->free_chs_mutex); return ch; } static void free_channel(struct fifo_gk20a *f, struct channel_gk20a *ch) { trace_gk20a_release_used_channel(ch->hw_chid); /* refcount is zero here and channel is in a freed/dead state */ mutex_lock(&f->free_chs_mutex); /* add to head to increase visibility of timing-related bugs */ list_add(&ch->free_chs, &f->free_chs); mutex_unlock(&f->free_chs_mutex); } int channel_gk20a_commit_va(struct channel_gk20a *c) { gk20a_dbg_fn(""); if (!c->inst_block.cpu_va) return -ENOMEM; gk20a_init_inst_block(&c->inst_block, c->vm, c->vm->gmmu_page_sizes[gmmu_page_size_big]); return 0; } static int channel_gk20a_commit_userd(struct channel_gk20a *c) { u32 addr_lo; u32 addr_hi; void *inst_ptr; gk20a_dbg_fn(""); inst_ptr = c->inst_block.cpu_va; if (!inst_ptr) return -ENOMEM; addr_lo = u64_lo32(c->userd_iova >> ram_userd_base_shift_v()); addr_hi = u64_hi32(c->userd_iova); gk20a_dbg_info("channel %d : set ramfc userd 0x%16llx", c->hw_chid, (u64)c->userd_iova); gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_w(), pbdma_userd_target_vid_mem_f() | pbdma_userd_addr_f(addr_lo)); gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_hi_w(), pbdma_userd_target_vid_mem_f() | pbdma_userd_hi_addr_f(addr_hi)); return 0; } int gk20a_channel_get_timescale_from_timeslice(struct gk20a *g, int timeslice_period, int *__timeslice_timeout, int *__timeslice_scale) { struct gk20a_platform *platform = platform_get_drvdata(g->dev); int value = scale_ptimer(timeslice_period, platform->ptimerscaling10x); int shift = 3; /* value field is 8 bits long */ while (value >= 1 << 8) { value >>= 1; shift++; } /* time slice register is only 18bits long */ if ((value << shift) >= 1<<19) { pr_err("Requested timeslice value is clamped to 18 bits\n"); value = 255; shift = 10; } *__timeslice_timeout = value; *__timeslice_scale = shift; return 0; } static int channel_gk20a_set_schedule_params(struct channel_gk20a *c, u32 timeslice_period) { void *inst_ptr; int shift = 0, value = 0; inst_ptr = c->inst_block.cpu_va; if (!inst_ptr) return -ENOMEM; gk20a_channel_get_timescale_from_timeslice(c->g, timeslice_period, &value, &shift); /* disable channel */ c->g->ops.fifo.disable_channel(c); /* preempt the channel */ WARN_ON(c->g->ops.fifo.preempt_channel(c->g, c->hw_chid)); /* set new timeslice */ gk20a_mem_wr32(inst_ptr, ram_fc_runlist_timeslice_w(), value | (shift << 12) | fifo_runlist_timeslice_enable_true_f()); /* enable channel */ gk20a_writel(c->g, ccsr_channel_r(c->hw_chid), gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) | ccsr_channel_enable_set_true_f()); return 0; } int channel_gk20a_setup_ramfc(struct channel_gk20a *c, u64 gpfifo_base, u32 gpfifo_entries, u32 flags) { void *inst_ptr; gk20a_dbg_fn(""); inst_ptr = c->inst_block.cpu_va; if (!inst_ptr) return -ENOMEM; memset(inst_ptr, 0, ram_fc_size_val_v()); gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_w(), pbdma_gp_base_offset_f( u64_lo32(gpfifo_base >> pbdma_gp_base_rsvd_s()))); gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_hi_w(), pbdma_gp_base_hi_offset_f(u64_hi32(gpfifo_base)) | pbdma_gp_base_hi_limit2_f(ilog2(gpfifo_entries))); gk20a_mem_wr32(inst_ptr, ram_fc_signature_w(), c->g->ops.fifo.get_pbdma_signature(c->g)); gk20a_mem_wr32(inst_ptr, ram_fc_formats_w(), pbdma_formats_gp_fermi0_f() | pbdma_formats_pb_fermi1_f() | pbdma_formats_mp_fermi0_f()); gk20a_mem_wr32(inst_ptr, ram_fc_pb_header_w(), pbdma_pb_header_priv_user_f() | pbdma_pb_header_method_zero_f() | pbdma_pb_header_subchannel_zero_f() | pbdma_pb_header_level_main_f() | pbdma_pb_header_first_true_f() | pbdma_pb_header_type_inc_f()); gk20a_mem_wr32(inst_ptr, ram_fc_subdevice_w(), pbdma_subdevice_id_f(1) | pbdma_subdevice_status_active_f() | pbdma_subdevice_channel_dma_enable_f()); gk20a_mem_wr32(inst_ptr, ram_fc_target_w(), pbdma_target_engine_sw_f()); gk20a_mem_wr32(inst_ptr, ram_fc_acquire_w(), pbdma_acquire_retry_man_2_f() | pbdma_acquire_retry_exp_2_f() | pbdma_acquire_timeout_exp_max_f() | pbdma_acquire_timeout_man_max_f() | pbdma_acquire_timeout_en_disable_f()); gk20a_mem_wr32(inst_ptr, ram_fc_runlist_timeslice_w(), fifo_runlist_timeslice_timeout_128_f() | fifo_runlist_timeslice_timescale_3_f() | fifo_runlist_timeslice_enable_true_f()); gk20a_mem_wr32(inst_ptr, ram_fc_pb_timeslice_w(), fifo_pb_timeslice_timeout_16_f() | fifo_pb_timeslice_timescale_0_f() | fifo_pb_timeslice_enable_true_f()); gk20a_mem_wr32(inst_ptr, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid)); return channel_gk20a_commit_userd(c); } static int channel_gk20a_setup_userd(struct channel_gk20a *c) { BUG_ON(!c->userd_cpu_va); gk20a_dbg_fn(""); gk20a_mem_wr32(c->userd_cpu_va, ram_userd_put_w(), 0); gk20a_mem_wr32(c->userd_cpu_va, ram_userd_get_w(), 0); gk20a_mem_wr32(c->userd_cpu_va, ram_userd_ref_w(), 0); gk20a_mem_wr32(c->userd_cpu_va, ram_userd_put_hi_w(), 0); gk20a_mem_wr32(c->userd_cpu_va, ram_userd_ref_threshold_w(), 0); gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_w(), 0); gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_hi_w(), 0); gk20a_mem_wr32(c->userd_cpu_va, ram_userd_get_hi_w(), 0); gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_get_w(), 0); gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_put_w(), 0); return 0; } static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a) { struct gk20a *g = ch_gk20a->g; struct fifo_gk20a *f = &g->fifo; struct fifo_engine_info_gk20a *engine_info = f->engine_info + ENGINE_GR_GK20A; u32 inst_ptr = gk20a_mem_phys(&ch_gk20a->inst_block) >> ram_in_base_shift_v(); gk20a_dbg_info("bind channel %d inst ptr 0x%08x", ch_gk20a->hw_chid, inst_ptr); ch_gk20a->bound = true; gk20a_writel(g, ccsr_channel_r(ch_gk20a->hw_chid), (gk20a_readl(g, ccsr_channel_r(ch_gk20a->hw_chid)) & ~ccsr_channel_runlist_f(~0)) | ccsr_channel_runlist_f(engine_info->runlist_id)); gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid), ccsr_channel_inst_ptr_f(inst_ptr) | ccsr_channel_inst_target_vid_mem_f() | ccsr_channel_inst_bind_true_f()); gk20a_writel(g, ccsr_channel_r(ch_gk20a->hw_chid), (gk20a_readl(g, ccsr_channel_r(ch_gk20a->hw_chid)) & ~ccsr_channel_enable_set_f(~0)) | ccsr_channel_enable_set_true_f()); } void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a) { struct gk20a *g = ch_gk20a->g; gk20a_dbg_fn(""); if (ch_gk20a->bound) gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid), ccsr_channel_inst_ptr_f(0) | ccsr_channel_inst_bind_false_f()); ch_gk20a->bound = false; /* * if we are agrressive then we can destroy the syncpt * resource at this point * if not, then it will be destroyed at channel_free() */ mutex_lock(&ch_gk20a->sync_lock); if (ch_gk20a->sync && ch_gk20a->sync->aggressive_destroy) { ch_gk20a->sync->destroy(ch_gk20a->sync); ch_gk20a->sync = NULL; } mutex_unlock(&ch_gk20a->sync_lock); } int channel_gk20a_alloc_inst(struct gk20a *g, struct channel_gk20a *ch) { int err; gk20a_dbg_fn(""); err = gk20a_alloc_inst_block(g, &ch->inst_block); if (err) return err; gk20a_dbg_info("channel %d inst block physical addr: 0x%16llx", ch->hw_chid, (u64)gk20a_mem_phys(&ch->inst_block)); gk20a_dbg_fn("done"); return 0; } void channel_gk20a_free_inst(struct gk20a *g, struct channel_gk20a *ch) { gk20a_free_inst_block(g, &ch->inst_block); } static int channel_gk20a_update_runlist(struct channel_gk20a *c, bool add) { return c->g->ops.fifo.update_runlist(c->g, 0, c->hw_chid, add, true); } void channel_gk20a_enable(struct channel_gk20a *ch) { /* enable channel */ gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid), gk20a_readl(ch->g, ccsr_channel_r(ch->hw_chid)) | ccsr_channel_enable_set_true_f()); } void channel_gk20a_disable(struct channel_gk20a *ch) { /* disable channel */ gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid), gk20a_readl(ch->g, ccsr_channel_r(ch->hw_chid)) | ccsr_channel_enable_clr_true_f()); } void gk20a_channel_abort(struct channel_gk20a *ch) { struct channel_gk20a_job *job, *n; bool released_job_semaphore = false; gk20a_dbg_fn(""); /* make sure new kickoffs are prevented */ ch->has_timedout = true; ch->g->ops.fifo.disable_channel(ch); /* ensure no fences are pending */ mutex_lock(&ch->sync_lock); if (ch->sync) ch->sync->set_min_eq_max(ch->sync); mutex_unlock(&ch->sync_lock); /* release all job semaphores (applies only to jobs that use semaphore synchronization) */ mutex_lock(&ch->jobs_lock); list_for_each_entry_safe(job, n, &ch->jobs, list) { if (job->post_fence->semaphore) { gk20a_semaphore_release(job->post_fence->semaphore); released_job_semaphore = true; } } mutex_unlock(&ch->jobs_lock); if (released_job_semaphore) { wake_up_interruptible_all(&ch->semaphore_wq); gk20a_channel_update(ch, 0); } } int gk20a_wait_channel_idle(struct channel_gk20a *ch) { bool channel_idle = false; unsigned long end_jiffies = jiffies + msecs_to_jiffies(gk20a_get_gr_idle_timeout(ch->g)); do { mutex_lock(&ch->jobs_lock); channel_idle = list_empty(&ch->jobs); mutex_unlock(&ch->jobs_lock); if (channel_idle) break; usleep_range(1000, 3000); } while (time_before(jiffies, end_jiffies) || !tegra_platform_is_silicon()); if (!channel_idle) { gk20a_err(dev_from_gk20a(ch->g), "jobs not freed for channel %d\n", ch->hw_chid); return -EBUSY; } return 0; } void gk20a_disable_channel(struct channel_gk20a *ch, bool finish, unsigned long finish_timeout) { gk20a_dbg_fn(""); if (finish) { int err = gk20a_channel_finish(ch, finish_timeout); WARN_ON(err); } /* disable the channel from hw and increment syncpoints */ gk20a_channel_abort(ch); gk20a_wait_channel_idle(ch); /* preempt the channel */ ch->g->ops.fifo.preempt_channel(ch->g, ch->hw_chid); /* remove channel from runlist */ channel_gk20a_update_runlist(ch, false); } #if defined(CONFIG_GK20A_CYCLE_STATS) static void gk20a_free_cycle_stats_buffer(struct channel_gk20a *ch) { /* disable existing cyclestats buffer */ mutex_lock(&ch->cyclestate.cyclestate_buffer_mutex); if (ch->cyclestate.cyclestate_buffer_handler) { dma_buf_vunmap(ch->cyclestate.cyclestate_buffer_handler, ch->cyclestate.cyclestate_buffer); dma_buf_put(ch->cyclestate.cyclestate_buffer_handler); ch->cyclestate.cyclestate_buffer_handler = NULL; ch->cyclestate.cyclestate_buffer = NULL; ch->cyclestate.cyclestate_buffer_size = 0; } mutex_unlock(&ch->cyclestate.cyclestate_buffer_mutex); } static int gk20a_channel_cycle_stats(struct channel_gk20a *ch, struct nvgpu_cycle_stats_args *args) { struct dma_buf *dmabuf; void *virtual_address; /* is it allowed to handle calls for current GPU? */ if (0 == (ch->g->gpu_characteristics.flags & NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS)) return -ENOSYS; if (args->dmabuf_fd && !ch->cyclestate.cyclestate_buffer_handler) { /* set up new cyclestats buffer */ dmabuf = dma_buf_get(args->dmabuf_fd); if (IS_ERR(dmabuf)) return PTR_ERR(dmabuf); virtual_address = dma_buf_vmap(dmabuf); if (!virtual_address) return -ENOMEM; ch->cyclestate.cyclestate_buffer_handler = dmabuf; ch->cyclestate.cyclestate_buffer = virtual_address; ch->cyclestate.cyclestate_buffer_size = dmabuf->size; return 0; } else if (!args->dmabuf_fd && ch->cyclestate.cyclestate_buffer_handler) { gk20a_free_cycle_stats_buffer(ch); return 0; } else if (!args->dmabuf_fd && !ch->cyclestate.cyclestate_buffer_handler) { /* no requst from GL */ return 0; } else { pr_err("channel already has cyclestats buffer\n"); return -EINVAL; } } static int gk20a_flush_cycle_stats_snapshot(struct channel_gk20a *ch) { int ret; mutex_lock(&ch->cs_client_mutex); if (ch->cs_client) ret = gr_gk20a_css_flush(ch->g, ch->cs_client); else ret = -EBADF; mutex_unlock(&ch->cs_client_mutex); return ret; } static int gk20a_attach_cycle_stats_snapshot(struct channel_gk20a *ch, u32 dmabuf_fd, u32 perfmon_id_count, u32 *perfmon_id_start) { int ret; mutex_lock(&ch->cs_client_mutex); if (ch->cs_client) { ret = -EEXIST; } else { ret = gr_gk20a_css_attach(ch->g, dmabuf_fd, perfmon_id_count, perfmon_id_start, &ch->cs_client); } mutex_unlock(&ch->cs_client_mutex); return ret; } static int gk20a_free_cycle_stats_snapshot(struct channel_gk20a *ch) { int ret; mutex_lock(&ch->cs_client_mutex); if (ch->cs_client) { ret = gr_gk20a_css_detach(ch->g, ch->cs_client); ch->cs_client = NULL; } else { ret = 0; } mutex_unlock(&ch->cs_client_mutex); return ret; } static int gk20a_channel_cycle_stats_snapshot(struct channel_gk20a *ch, struct nvgpu_cycle_stats_snapshot_args *args) { int ret; /* is it allowed to handle calls for current GPU? */ if (0 == (ch->g->gpu_characteristics.flags & NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS_SNAPSHOT)) return -ENOSYS; if (!args->dmabuf_fd) return -EINVAL; /* handle the command (most frequent cases first) */ switch (args->cmd) { case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_FLUSH: ret = gk20a_flush_cycle_stats_snapshot(ch); args->extra = 0; break; case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_ATTACH: ret = gk20a_attach_cycle_stats_snapshot(ch, args->dmabuf_fd, args->extra, &args->extra); break; case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_DETACH: ret = gk20a_free_cycle_stats_snapshot(ch); args->extra = 0; break; default: pr_err("cyclestats: unknown command %u\n", args->cmd); ret = -EINVAL; break; } return ret; } #endif static int gk20a_init_error_notifier(struct channel_gk20a *ch, struct nvgpu_set_error_notifier *args) { void *va; struct dma_buf *dmabuf; if (!args->mem) { pr_err("gk20a_init_error_notifier: invalid memory handle\n"); return -EINVAL; } dmabuf = dma_buf_get(args->mem); if (ch->error_notifier_ref) gk20a_free_error_notifiers(ch); if (IS_ERR(dmabuf)) { pr_err("Invalid handle: %d\n", args->mem); return -EINVAL; } /* map handle */ va = dma_buf_vmap(dmabuf); if (!va) { dma_buf_put(dmabuf); pr_err("Cannot map notifier handle\n"); return -ENOMEM; } /* set channel notifiers pointer */ ch->error_notifier_ref = dmabuf; ch->error_notifier = va + args->offset; ch->error_notifier_va = va; memset(ch->error_notifier, 0, sizeof(struct nvgpu_notification)); return 0; } void gk20a_set_error_notifier(struct channel_gk20a *ch, __u32 error) { if (ch->error_notifier_ref) { struct timespec time_data; u64 nsec; getnstimeofday(&time_data); nsec = ((u64)time_data.tv_sec) * 1000000000u + (u64)time_data.tv_nsec; ch->error_notifier->time_stamp.nanoseconds[0] = (u32)nsec; ch->error_notifier->time_stamp.nanoseconds[1] = (u32)(nsec >> 32); ch->error_notifier->info32 = error; ch->error_notifier->status = 0xffff; gk20a_err(dev_from_gk20a(ch->g), "error notifier set to %d for ch %d", error, ch->hw_chid); } } static void gk20a_free_error_notifiers(struct channel_gk20a *ch) { if (ch->error_notifier_ref) { dma_buf_vunmap(ch->error_notifier_ref, ch->error_notifier_va); dma_buf_put(ch->error_notifier_ref); ch->error_notifier_ref = NULL; ch->error_notifier = NULL; ch->error_notifier_va = NULL; } } /* Returns delta of cyclic integers a and b. If a is ahead of b, delta * is positive */ static int cyclic_delta(int a, int b) { return a - b; } static void gk20a_wait_for_deferred_interrupts(struct gk20a *g) { int stall_irq_threshold = atomic_read(&g->hw_irq_stall_count); int nonstall_irq_threshold = atomic_read(&g->hw_irq_nonstall_count); /* wait until all stalling irqs are handled */ wait_event(g->sw_irq_stall_last_handled_wq, cyclic_delta(stall_irq_threshold, atomic_read(&g->sw_irq_stall_last_handled)) <= 0); /* wait until all non-stalling irqs are handled */ wait_event(g->sw_irq_nonstall_last_handled_wq, cyclic_delta(nonstall_irq_threshold, atomic_read(&g->sw_irq_nonstall_last_handled)) <= 0); } static void gk20a_wait_until_counter_is_N( struct channel_gk20a *ch, atomic_t *counter, int wait_value, wait_queue_head_t *wq, const char *caller, const char *counter_name) { while (true) { if (wait_event_timeout( *wq, atomic_read(counter) == wait_value, msecs_to_jiffies(5000)) > 0) break; gk20a_warn(dev_from_gk20a(ch->g), "%s: channel %d, still waiting, %s left: %d, waiting for: %d", caller, ch->hw_chid, counter_name, atomic_read(counter), wait_value); } } /* call ONLY when no references to the channel exist: after the last put */ static void gk20a_free_channel(struct channel_gk20a *ch) { struct gk20a *g = ch->g; struct fifo_gk20a *f = &g->fifo; struct gr_gk20a *gr = &g->gr; struct vm_gk20a *ch_vm = ch->vm; unsigned long timeout = gk20a_get_gr_idle_timeout(g); struct dbg_session_gk20a *dbg_s; bool was_reset; gk20a_dbg_fn(""); WARN_ON(ch->g == NULL); trace_gk20a_free_channel(ch->hw_chid); /* prevent new kickoffs */ ch->has_timedout = true; wmb(); /* wait until there's only our ref to the channel */ gk20a_wait_until_counter_is_N( ch, &ch->ref_count, 1, &ch->ref_count_dec_wq, __func__, "references"); /* wait until all pending interrupts for recently completed * jobs are handled */ gk20a_wait_for_deferred_interrupts(g); /* prevent new refs */ spin_lock(&ch->ref_obtain_lock); if (!ch->referenceable) { spin_unlock(&ch->ref_obtain_lock); gk20a_err(dev_from_gk20a(ch->g), "Extra %s() called to channel %u", __func__, ch->hw_chid); return; } ch->referenceable = false; spin_unlock(&ch->ref_obtain_lock); /* matches with the initial reference in gk20a_open_new_channel() */ atomic_dec(&ch->ref_count); /* wait until no more refs to the channel */ gk20a_wait_until_counter_is_N( ch, &ch->ref_count, 0, &ch->ref_count_dec_wq, __func__, "references"); /* if engine reset was deferred, perform it now */ mutex_lock(&f->deferred_reset_mutex); if (g->fifo.deferred_reset_pending) { gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "engine reset was" " deferred, running now"); was_reset = mutex_is_locked(&g->fifo.gr_reset_mutex); mutex_lock(&g->fifo.gr_reset_mutex); /* if lock is already taken, a reset is taking place so no need to repeat */ if (!was_reset) { gk20a_fifo_reset_engine(g, g->fifo.deferred_fault_engines); } mutex_unlock(&g->fifo.gr_reset_mutex); g->fifo.deferred_fault_engines = 0; g->fifo.deferred_reset_pending = false; } mutex_unlock(&f->deferred_reset_mutex); if (!ch->bound) goto release; if (!gk20a_channel_as_bound(ch)) goto unbind; gk20a_dbg_info("freeing bound channel context, timeout=%ld", timeout); gk20a_disable_channel(ch, !ch->has_timedout, timeout); gk20a_free_error_notifiers(ch); /* release channel ctx */ g->ops.gr.free_channel_ctx(ch); gk20a_gr_flush_channel_tlb(gr); memset(&ch->ramfc, 0, sizeof(struct mem_desc_sub)); gk20a_gmmu_unmap_free(ch_vm, &ch->gpfifo.mem); memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc)); #if defined(CONFIG_GK20A_CYCLE_STATS) gk20a_free_cycle_stats_buffer(ch); gk20a_free_cycle_stats_snapshot(ch); #endif channel_gk20a_free_priv_cmdbuf(ch); /* sync must be destroyed before releasing channel vm */ mutex_lock(&ch->sync_lock); if (ch->sync) { ch->sync->destroy(ch->sync); ch->sync = NULL; } mutex_unlock(&ch->sync_lock); /* release channel binding to the as_share */ if (ch_vm->as_share) gk20a_as_release_share(ch_vm->as_share); else gk20a_vm_put(ch_vm); spin_lock(&ch->update_fn_lock); ch->update_fn = NULL; ch->update_fn_data = NULL; spin_unlock(&ch->update_fn_lock); cancel_work_sync(&ch->update_fn_work); /* make sure we don't have deferred interrupts pending that * could still touch the channel */ gk20a_wait_for_deferred_interrupts(g); unbind: if (gk20a_is_channel_marked_as_tsg(ch)) gk20a_tsg_unbind_channel(ch); g->ops.fifo.unbind_channel(ch); g->ops.fifo.free_inst(g, ch); ch->vpr = false; ch->vm = NULL; mutex_lock(&ch->submit_lock); gk20a_fence_put(ch->last_submit.pre_fence); gk20a_fence_put(ch->last_submit.post_fence); ch->last_submit.pre_fence = NULL; ch->last_submit.post_fence = NULL; mutex_unlock(&ch->submit_lock); WARN_ON(ch->sync); /* unlink all debug sessions */ mutex_lock(&ch->dbg_s_lock); list_for_each_entry(dbg_s, &ch->dbg_s_list, dbg_s_list_node) { dbg_s->ch = NULL; list_del_init(&dbg_s->dbg_s_list_node); } mutex_unlock(&ch->dbg_s_lock); release: /* make sure we catch accesses of unopened channels in case * there's non-refcounted channel pointers hanging around */ ch->g = NULL; wmb(); /* ALWAYS last */ free_channel(f, ch); } /* Try to get a reference to the channel. Return nonzero on success. If fails, * the channel is dead or being freed elsewhere and you must not touch it. * * Always when a channel_gk20a pointer is seen and about to be used, a * reference must be held to it - either by you or the caller, which should be * documented well or otherwise clearly seen. This usually boils down to the * file from ioctls directly, or an explicit get in exception handlers when the * channel is found by a hw_chid. * * Most global functions in this file require a reference to be held by the * caller. */ struct channel_gk20a *_gk20a_channel_get(struct channel_gk20a *ch, const char *caller) { struct channel_gk20a *ret; spin_lock(&ch->ref_obtain_lock); if (likely(ch->referenceable)) { atomic_inc(&ch->ref_count); ret = ch; } else ret = NULL; spin_unlock(&ch->ref_obtain_lock); if (ret) trace_gk20a_channel_get(ch->hw_chid, caller); return ret; } void _gk20a_channel_put(struct channel_gk20a *ch, const char *caller) { trace_gk20a_channel_put(ch->hw_chid, caller); atomic_dec(&ch->ref_count); wake_up_all(&ch->ref_count_dec_wq); /* More puts than gets. Channel is probably going to get * stuck. */ WARN_ON(atomic_read(&ch->ref_count) < 0); /* Also, more puts than gets. ref_count can go to 0 only if * the channel is closing. Channel is probably going to get * stuck. */ WARN_ON(atomic_read(&ch->ref_count) == 0 && ch->referenceable); } void gk20a_channel_close(struct channel_gk20a *ch) { gk20a_free_channel(ch); } int gk20a_channel_release(struct inode *inode, struct file *filp) { struct channel_gk20a *ch = (struct channel_gk20a *)filp->private_data; struct gk20a *g = ch ? ch->g : NULL; int err; if (!ch) return 0; trace_gk20a_channel_release(dev_name(&g->dev->dev)); err = gk20a_busy(g->dev); if (err) { gk20a_err(dev_from_gk20a(g), "failed to release channel %d", ch->hw_chid); return err; } gk20a_channel_close(ch); gk20a_idle(g->dev); filp->private_data = NULL; return 0; } static void gk20a_channel_update_runcb_fn(struct work_struct *work) { struct channel_gk20a *ch = container_of(work, struct channel_gk20a, update_fn_work); void (*update_fn)(struct channel_gk20a *, void *); void *update_fn_data; spin_lock(&ch->update_fn_lock); update_fn = ch->update_fn; update_fn_data = ch->update_fn_data; spin_unlock(&ch->update_fn_lock); if (update_fn) update_fn(ch, update_fn_data); } struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g, void (*update_fn)(struct channel_gk20a *, void *), void *update_fn_data) { struct channel_gk20a *ch = gk20a_open_new_channel(g); if (ch) { spin_lock(&ch->update_fn_lock); ch->update_fn = update_fn; ch->update_fn_data = update_fn_data; spin_unlock(&ch->update_fn_lock); } return ch; } struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g) { struct fifo_gk20a *f = &g->fifo; struct channel_gk20a *ch; gk20a_dbg_fn(""); ch = allocate_channel(f); if (ch == NULL) { /* TBD: we want to make this virtualizable */ gk20a_err(dev_from_gk20a(g), "out of hw chids"); return NULL; } trace_gk20a_open_new_channel(ch->hw_chid); BUG_ON(ch->g); ch->g = g; if (g->ops.fifo.alloc_inst(g, ch)) { ch->g = NULL; free_channel(f, ch); gk20a_err(dev_from_gk20a(g), "failed to open gk20a channel, out of inst mem"); return NULL; } /* now the channel is in a limbo out of the free list but not marked as * alive and used (i.e. get-able) yet */ ch->pid = current->pid; /* By default, channel is regular (non-TSG) channel */ ch->tsgid = NVGPU_INVALID_TSG_ID; /* reset timeout counter and update timestamp */ ch->timeout_accumulated_ms = 0; ch->timeout_gpfifo_get = 0; /* set gr host default timeout */ ch->timeout_ms_max = gk20a_get_gr_idle_timeout(g); ch->timeout_debug_dump = true; ch->has_timedout = false; ch->obj_class = 0; /* The channel is *not* runnable at this point. It still needs to have * an address space bound and allocate a gpfifo and grctx. */ init_waitqueue_head(&ch->notifier_wq); init_waitqueue_head(&ch->semaphore_wq); init_waitqueue_head(&ch->submit_wq); mutex_init(&ch->poll_events.lock); ch->poll_events.events_enabled = false; ch->poll_events.num_pending_events = 0; ch->update_fn = NULL; ch->update_fn_data = NULL; spin_lock_init(&ch->update_fn_lock); INIT_WORK(&ch->update_fn_work, gk20a_channel_update_runcb_fn); /* Mark the channel alive, get-able, with 1 initial use * references. The initial reference will be decreased in * gk20a_free_channel() */ ch->referenceable = true; atomic_set(&ch->ref_count, 1); wmb(); return ch; } static int __gk20a_channel_open(struct gk20a *g, struct file *filp) { int err; struct channel_gk20a *ch; trace_gk20a_channel_open(dev_name(&g->dev->dev)); err = gk20a_busy(g->dev); if (err) { gk20a_err(dev_from_gk20a(g), "failed to power on, %d", err); return err; } ch = gk20a_open_new_channel(g); gk20a_idle(g->dev); if (!ch) { gk20a_err(dev_from_gk20a(g), "failed to get f"); return -ENOMEM; } filp->private_data = ch; return 0; } int gk20a_channel_open(struct inode *inode, struct file *filp) { struct gk20a *g = container_of(inode->i_cdev, struct gk20a, channel.cdev); int ret; gk20a_dbg_fn("start"); ret = __gk20a_channel_open(g, filp); gk20a_dbg_fn("end"); return ret; } int gk20a_channel_open_ioctl(struct gk20a *g, struct nvgpu_channel_open_args *args) { int err; int fd; struct file *file; char *name; err = get_unused_fd_flags(O_RDWR); if (err < 0) return err; fd = err; name = kasprintf(GFP_KERNEL, "nvhost-%s-fd%d", dev_name(&g->dev->dev), fd); if (!name) { err = -ENOMEM; goto clean_up; } file = anon_inode_getfile(name, g->channel.cdev.ops, NULL, O_RDWR); kfree(name); if (IS_ERR(file)) { err = PTR_ERR(file); goto clean_up; } err = __gk20a_channel_open(g, file); if (err) goto clean_up_file; fd_install(fd, file); args->channel_fd = fd; return 0; clean_up_file: fput(file); clean_up: put_unused_fd(fd); return err; } /* allocate private cmd buffer. used for inserting commands before/after user submitted buffers. */ static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c) { struct device *d = dev_from_gk20a(c->g); struct vm_gk20a *ch_vm = c->vm; struct priv_cmd_queue *q = &c->priv_cmd_q; u32 size; int err = 0; /* Kernel can insert gpfifos before and after user gpfifos. Before user gpfifos, kernel inserts fence_wait, which takes syncpoint_a (2 dwords) + syncpoint_b (2 dwords) = 4 dwords. After user gpfifos, kernel inserts fence_get, which takes wfi (2 dwords) + syncpoint_a (2 dwords) + syncpoint_b (2 dwords) = 6 dwords. Worse case if kernel adds both of them for every user gpfifo, max size of priv_cmdbuf is : (gpfifo entry number * (2 / 3) * (4 + 6) * 4 bytes */ size = roundup_pow_of_two( c->gpfifo.entry_num * 2 * 12 * sizeof(u32) / 3); err = gk20a_gmmu_alloc_map(ch_vm, size, &q->mem); if (err) { gk20a_err(d, "%s: memory allocation failed\n", __func__); goto clean_up; } q->size = q->mem.size / sizeof (u32); INIT_LIST_HEAD(&q->head); INIT_LIST_HEAD(&q->free); return 0; clean_up: channel_gk20a_free_priv_cmdbuf(c); return err; } static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c) { struct vm_gk20a *ch_vm = c->vm; struct priv_cmd_queue *q = &c->priv_cmd_q; struct priv_cmd_entry *e; struct list_head *pos, *tmp, *head; if (q->size == 0) return; gk20a_gmmu_unmap_free(ch_vm, &q->mem); /* free used list */ head = &q->head; list_for_each_safe(pos, tmp, head) { e = container_of(pos, struct priv_cmd_entry, list); free_priv_cmdbuf(c, e); } /* free free list */ head = &q->free; list_for_each_safe(pos, tmp, head) { e = container_of(pos, struct priv_cmd_entry, list); kfree(e); } memset(q, 0, sizeof(struct priv_cmd_queue)); } /* allocate a cmd buffer with given size. size is number of u32 entries */ int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size, struct priv_cmd_entry **entry) { struct priv_cmd_queue *q = &c->priv_cmd_q; struct priv_cmd_entry *e; u32 free_count; u32 size = orig_size; bool no_retry = false; gk20a_dbg_fn("size %d", orig_size); *entry = NULL; /* if free space in the end is less than requested, increase the size * to make the real allocated space start from beginning. */ if (q->put + size > q->size) size = orig_size + (q->size - q->put); gk20a_dbg_info("ch %d: priv cmd queue get:put %d:%d", c->hw_chid, q->get, q->put); TRY_AGAIN: free_count = (q->size - (q->put - q->get) - 1) % q->size; if (size > free_count) { if (!no_retry) { recycle_priv_cmdbuf(c); no_retry = true; goto TRY_AGAIN; } else return -EAGAIN; } e = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL); if (!e) { gk20a_err(dev_from_gk20a(c->g), "ch %d: fail to allocate priv cmd entry", c->hw_chid); return -ENOMEM; } e->size = orig_size; e->gp_get = c->gpfifo.get; e->gp_put = c->gpfifo.put; e->gp_wrap = c->gpfifo.wrap; /* if we have increased size to skip free space in the end, set put to beginning of cmd buffer (0) + size */ if (size != orig_size) { e->ptr = (u32 *)q->mem.cpu_va; e->gva = q->mem.gpu_va; q->put = orig_size; } else { e->ptr = (u32 *)q->mem.cpu_va + q->put; e->gva = q->mem.gpu_va + q->put * sizeof(u32); q->put = (q->put + orig_size) & (q->size - 1); } /* we already handled q->put + size > q->size so BUG_ON this */ BUG_ON(q->put > q->size); /* add new entry to head since we free from head */ list_add(&e->list, &q->head); *entry = e; gk20a_dbg_fn("done"); return 0; } /* Don't call this to free an explict cmd entry. * It doesn't update priv_cmd_queue get/put */ static void free_priv_cmdbuf(struct channel_gk20a *c, struct priv_cmd_entry *e) { if (!e) return; list_del(&e->list); kfree(e); } /* free entries if they're no longer being used */ static void recycle_priv_cmdbuf(struct channel_gk20a *c) { struct priv_cmd_queue *q = &c->priv_cmd_q; struct priv_cmd_entry *e, *tmp; struct list_head *head = &q->head; bool wrap_around, found = false; gk20a_dbg_fn(""); /* Find the most recent free entry. Free it and everything before it */ list_for_each_entry(e, head, list) { gk20a_dbg_info("ch %d: cmd entry get:put:wrap %d:%d:%d " "curr get:put:wrap %d:%d:%d", c->hw_chid, e->gp_get, e->gp_put, e->gp_wrap, c->gpfifo.get, c->gpfifo.put, c->gpfifo.wrap); wrap_around = (c->gpfifo.wrap != e->gp_wrap); if (e->gp_get < e->gp_put) { if (c->gpfifo.get >= e->gp_put || wrap_around) { found = true; break; } else e->gp_get = c->gpfifo.get; } else if (e->gp_get > e->gp_put) { if (wrap_around && c->gpfifo.get >= e->gp_put) { found = true; break; } else e->gp_get = c->gpfifo.get; } } if (found) q->get = (e->ptr - (u32 *)q->mem.cpu_va) + e->size; else { gk20a_dbg_info("no free entry recycled"); return; } list_for_each_entry_safe_continue(e, tmp, head, list) { free_priv_cmdbuf(c, e); } gk20a_dbg_fn("done"); } int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c, struct nvgpu_alloc_gpfifo_args *args) { struct gk20a *g = c->g; struct device *d = dev_from_gk20a(g); struct vm_gk20a *ch_vm; u32 gpfifo_size; int err = 0; /* Kernel can insert one extra gpfifo entry before user submitted gpfifos and another one after, for internal usage. Triple the requested size. */ gpfifo_size = roundup_pow_of_two(args->num_entries * 3); if (args->flags & NVGPU_ALLOC_GPFIFO_FLAGS_VPR_ENABLED) c->vpr = true; /* an address space needs to have been bound at this point. */ if (!gk20a_channel_as_bound(c)) { gk20a_err(d, "not bound to an address space at time of gpfifo" " allocation."); return -EINVAL; } ch_vm = c->vm; c->cmds_pending = false; mutex_lock(&c->submit_lock); gk20a_fence_put(c->last_submit.pre_fence); gk20a_fence_put(c->last_submit.post_fence); c->last_submit.pre_fence = NULL; c->last_submit.post_fence = NULL; mutex_unlock(&c->submit_lock); c->ramfc.offset = 0; c->ramfc.size = ram_in_ramfc_s() / 8; if (c->gpfifo.mem.cpu_va) { gk20a_err(d, "channel %d :" "gpfifo already allocated", c->hw_chid); return -EEXIST; } err = gk20a_gmmu_alloc_map(ch_vm, gpfifo_size * sizeof(struct nvgpu_gpfifo), &c->gpfifo.mem); if (err) { gk20a_err(d, "%s: memory allocation failed\n", __func__); goto clean_up; } c->gpfifo.entry_num = gpfifo_size; c->gpfifo.get = c->gpfifo.put = 0; gk20a_dbg_info("channel %d : gpfifo_base 0x%016llx, size %d", c->hw_chid, c->gpfifo.mem.gpu_va, c->gpfifo.entry_num); channel_gk20a_setup_userd(c); err = g->ops.fifo.setup_ramfc(c, c->gpfifo.mem.gpu_va, c->gpfifo.entry_num, args->flags); if (err) goto clean_up_unmap; /* TBD: setup engine contexts */ err = channel_gk20a_alloc_priv_cmdbuf(c); if (err) goto clean_up_unmap; err = channel_gk20a_update_runlist(c, true); if (err) goto clean_up_unmap; g->ops.fifo.bind_channel(c); gk20a_dbg_fn("done"); return 0; clean_up_unmap: gk20a_gmmu_unmap_free(ch_vm, &c->gpfifo.mem); clean_up: memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc)); gk20a_err(d, "fail"); return err; } static inline bool check_gp_put(struct gk20a *g, struct channel_gk20a *c) { u32 put; /* gp_put changed unexpectedly since last update? */ put = gk20a_bar1_readl(g, c->userd_gpu_va + 4 * ram_userd_gp_put_w()); if (c->gpfifo.put != put) { /*TBD: BUG_ON/teardown on this*/ gk20a_err(dev_from_gk20a(g), "gp_put changed unexpectedly " "since last update, channel put = %u, ram put = %u\n", c->gpfifo.put, put); c->gpfifo.put = put; return false; /* surprise! */ } return true; /* checked out ok */ } /* Update with this periodically to determine how the gpfifo is draining. */ static inline u32 update_gp_get(struct gk20a *g, struct channel_gk20a *c) { u32 new_get = gk20a_bar1_readl(g, c->userd_gpu_va + sizeof(u32) * ram_userd_gp_get_w()); if (new_get < c->gpfifo.get) c->gpfifo.wrap = !c->gpfifo.wrap; c->gpfifo.get = new_get; return new_get; } static inline u32 gp_free_count(struct channel_gk20a *c) { return (c->gpfifo.entry_num - (c->gpfifo.put - c->gpfifo.get) - 1) % c->gpfifo.entry_num; } bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch, u32 timeout_delta_ms) { u32 gpfifo_get = update_gp_get(ch->g, ch); /* Count consequent timeout isr */ if (gpfifo_get == ch->timeout_gpfifo_get) { /* we didn't advance since previous channel timeout check */ ch->timeout_accumulated_ms += timeout_delta_ms; } else { /* first timeout isr encountered */ ch->timeout_accumulated_ms = timeout_delta_ms; } ch->timeout_gpfifo_get = gpfifo_get; return ch->g->timeouts_enabled && ch->timeout_accumulated_ms > ch->timeout_ms_max; } static u32 gk20a_get_channel_watchdog_timeout(struct channel_gk20a *ch) { struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev); if (ch->g->timeouts_enabled && ch->g->ch_wdt_enabled && platform->ch_wdt_timeout_ms) return platform->ch_wdt_timeout_ms; else return (u32)MAX_SCHEDULE_TIMEOUT; } static u32 get_gp_free_count(struct channel_gk20a *c) { update_gp_get(c->g, c); return gp_free_count(c); } static void trace_write_pushbuffer(struct channel_gk20a *c, struct nvgpu_gpfifo *g) { void *mem = NULL; unsigned int words; u64 offset; struct dma_buf *dmabuf = NULL; if (gk20a_debug_trace_cmdbuf) { u64 gpu_va = (u64)g->entry0 | (u64)((u64)pbdma_gp_entry1_get_hi_v(g->entry1) << 32); int err; words = pbdma_gp_entry1_length_v(g->entry1); err = gk20a_vm_find_buffer(c->vm, gpu_va, &dmabuf, &offset); if (!err) mem = dma_buf_vmap(dmabuf); } if (mem) { u32 i; /* * Write in batches of 128 as there seems to be a limit * of how much you can output to ftrace at once. */ for (i = 0; i < words; i += 128U) { trace_gk20a_push_cmdbuf( c->g->dev->name, 0, min(words - i, 128U), offset + i * sizeof(u32), mem); } dma_buf_vunmap(dmabuf, mem); } } static void trace_write_pushbuffer_range(struct channel_gk20a *c, struct nvgpu_gpfifo *g, int count) { if (gk20a_debug_trace_cmdbuf) { int i; struct nvgpu_gpfifo *gp = g; for (i = 0; i < count; i++, gp++) trace_write_pushbuffer(c, gp); } } static void gk20a_channel_timeout_start(struct channel_gk20a *ch, struct channel_gk20a_job *job) { mutex_lock(&ch->timeout.lock); if (ch->timeout.initialized) { mutex_unlock(&ch->timeout.lock); return; } ch->timeout.job = job; ch->timeout.initialized = true; schedule_delayed_work(&ch->timeout.wq, msecs_to_jiffies(gk20a_get_channel_watchdog_timeout(ch))); mutex_unlock(&ch->timeout.lock); } static void gk20a_channel_timeout_stop(struct channel_gk20a *ch) { mutex_lock(&ch->timeout.lock); if (!ch->timeout.initialized) { mutex_unlock(&ch->timeout.lock); return; } mutex_unlock(&ch->timeout.lock); cancel_delayed_work_sync(&ch->timeout.wq); mutex_lock(&ch->timeout.lock); ch->timeout.initialized = false; mutex_unlock(&ch->timeout.lock); } void gk20a_channel_timeout_restart_all_channels(struct gk20a *g) { u32 chid; struct fifo_gk20a *f = &g->fifo; for (chid = 0; chid < f->num_channels; chid++) { struct channel_gk20a *ch = &f->channel[chid]; if (gk20a_channel_get(ch)) { mutex_lock(&ch->timeout.lock); if (!ch->timeout.initialized) { mutex_unlock(&ch->timeout.lock); gk20a_channel_put(ch); continue; } mutex_unlock(&ch->timeout.lock); cancel_delayed_work_sync(&ch->timeout.wq); if (!ch->has_timedout) schedule_delayed_work(&ch->timeout.wq, msecs_to_jiffies( gk20a_get_channel_watchdog_timeout(ch))); gk20a_channel_put(ch); } } } static void gk20a_channel_timeout_handler(struct work_struct *work) { struct channel_gk20a_job *job; struct gk20a *g; struct channel_gk20a *ch; struct channel_gk20a *failing_ch; u32 engine_id; int id = -1; bool is_tsg = false; ch = container_of(to_delayed_work(work), struct channel_gk20a, timeout.wq); ch = gk20a_channel_get(ch); if (!ch) return; g = ch->g; /* Need global lock since multiple channels can timeout at a time */ mutex_lock(&g->ch_wdt_lock); /* Get timed out job and reset the timer */ mutex_lock(&ch->timeout.lock); job = ch->timeout.job; ch->timeout.initialized = false; mutex_unlock(&ch->timeout.lock); if (gk20a_fifo_disable_all_engine_activity(g, true)) goto fail_unlock; if (gk20a_fence_is_expired(job->post_fence)) goto fail_enable_engine_activity; gk20a_err(dev_from_gk20a(g), "Job on channel %d timed out\n", ch->hw_chid); /* Get failing engine data */ engine_id = gk20a_fifo_get_failing_engine_data(g, &id, &is_tsg); if (engine_id >= g->fifo.max_engines) { /* If no failing engine, abort the channels */ if (gk20a_is_channel_marked_as_tsg(ch)) { struct tsg_gk20a *tsg = &g->fifo.tsg[ch->tsgid]; gk20a_fifo_set_ctx_mmu_error_tsg(g, tsg); gk20a_fifo_abort_tsg(g, ch->tsgid); } else { gk20a_fifo_set_ctx_mmu_error_ch(g, ch); gk20a_channel_abort(ch); } } else { /* If failing engine, trigger recovery */ failing_ch = gk20a_channel_get(&g->fifo.channel[id]); if (!failing_ch) goto fail_enable_engine_activity; if (failing_ch->hw_chid != ch->hw_chid) gk20a_channel_timeout_start(ch, job); gk20a_fifo_recover(g, BIT(engine_id), failing_ch->hw_chid, is_tsg, true, failing_ch->timeout_debug_dump); gk20a_channel_put(failing_ch); } fail_enable_engine_activity: gk20a_fifo_enable_all_engine_activity(g); fail_unlock: mutex_unlock(&g->ch_wdt_lock); gk20a_channel_put(ch); } static int gk20a_channel_add_job(struct channel_gk20a *c, struct gk20a_fence *pre_fence, struct gk20a_fence *post_fence) { struct vm_gk20a *vm = c->vm; struct channel_gk20a_job *job = NULL; struct mapped_buffer_node **mapped_buffers = NULL; int err = 0, num_mapped_buffers; /* job needs reference to this vm (released in channel_update) */ gk20a_vm_get(vm); err = gk20a_vm_get_buffers(vm, &mapped_buffers, &num_mapped_buffers); if (err) { gk20a_vm_put(vm); return err; } job = kzalloc(sizeof(*job), GFP_KERNEL); if (!job) { gk20a_vm_put_buffers(vm, mapped_buffers, num_mapped_buffers); gk20a_vm_put(vm); return -ENOMEM; } /* put() is done in gk20a_channel_update() when the job is done */ c = gk20a_channel_get(c); if (c) { job->num_mapped_buffers = num_mapped_buffers; job->mapped_buffers = mapped_buffers; job->pre_fence = gk20a_fence_get(pre_fence); job->post_fence = gk20a_fence_get(post_fence); gk20a_channel_timeout_start(c, job); mutex_lock(&c->jobs_lock); list_add_tail(&job->list, &c->jobs); mutex_unlock(&c->jobs_lock); } else { return -ETIMEDOUT; } return 0; } void gk20a_channel_update(struct channel_gk20a *c, int nr_completed) { struct vm_gk20a *vm = c->vm; struct channel_gk20a_job *job, *n; trace_gk20a_channel_update(c->hw_chid); wake_up(&c->submit_wq); mutex_lock(&c->submit_lock); mutex_lock(&c->jobs_lock); list_for_each_entry_safe(job, n, &c->jobs, list) { struct gk20a *g = c->g; bool completed = gk20a_fence_is_expired(job->post_fence); if (!completed) { gk20a_channel_timeout_start(c, job); break; } gk20a_channel_timeout_stop(c); if (c->sync) c->sync->signal_timeline(c->sync); gk20a_vm_put_buffers(vm, job->mapped_buffers, job->num_mapped_buffers); /* Close the fences (this will unref the semaphores and release * them to the pool). */ gk20a_fence_put(job->pre_fence); gk20a_fence_put(job->post_fence); /* job is done. release its vm reference (taken in add_job) */ gk20a_vm_put(vm); /* another bookkeeping taken in add_job. caller must hold a ref * so this wouldn't get freed here. */ gk20a_channel_put(c); list_del_init(&job->list); kfree(job); gk20a_idle(g->dev); } /* * If job list is empty then channel is idle and we can free * the syncpt here (given aggressive_destroy flag is set) * Note: check if last submit is complete before destroying * the sync resource */ if (list_empty(&c->jobs)) { mutex_lock(&c->sync_lock); if (c->sync && c->sync->aggressive_destroy && gk20a_fence_is_expired(c->last_submit.post_fence)) { c->sync->destroy(c->sync); c->sync = NULL; } mutex_unlock(&c->sync_lock); } mutex_unlock(&c->jobs_lock); mutex_unlock(&c->submit_lock); if (c->update_fn) schedule_work(&c->update_fn_work); } int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, struct nvgpu_gpfifo *gpfifo, u32 num_entries, u32 flags, struct nvgpu_fence *fence, struct gk20a_fence **fence_out) { struct gk20a *g = c->g; struct device *d = dev_from_gk20a(g); int err = 0; int start, end; int wait_fence_fd = -1; struct priv_cmd_entry *wait_cmd = NULL; struct priv_cmd_entry *incr_cmd = NULL; struct gk20a_fence *pre_fence = NULL; struct gk20a_fence *post_fence = NULL; /* we might need two extra gpfifo entries - one for pre fence * and one for post fence. */ const int extra_entries = 2; bool need_wfi = !(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI); struct nvgpu_gpfifo *gpfifo_mem = c->gpfifo.mem.cpu_va; if (c->has_timedout) return -ETIMEDOUT; /* fifo not large enough for request. Return error immediately. * Kernel can insert gpfifo entries before and after user gpfifos. * So, add extra_entries in user request. Also, HW with fifo size N * can accept only N-1 entreis and so the below condition */ if (c->gpfifo.entry_num - 1 < num_entries + extra_entries) { gk20a_err(d, "not enough gpfifo space allocated"); return -ENOMEM; } if ((flags & (NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT | NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)) && !fence) return -EINVAL; /* an address space needs to have been bound at this point. */ if (!gk20a_channel_as_bound(c)) { gk20a_err(d, "not bound to an address space at time of gpfifo" " submission."); return -EINVAL; } #ifdef CONFIG_DEBUG_FS /* update debug settings */ if (g->ops.ltc.sync_debugfs) g->ops.ltc.sync_debugfs(g); #endif gk20a_dbg_info("channel %d", c->hw_chid); /* gk20a_channel_update releases this ref. */ err = gk20a_busy(g->dev); if (err) { gk20a_err(d, "failed to host gk20a to submit gpfifo"); return err; } trace_gk20a_channel_submit_gpfifo(c->g->dev->name, c->hw_chid, num_entries, flags, fence ? fence->id : 0, fence ? fence->value : 0); check_gp_put(g, c); update_gp_get(g, c); gk20a_dbg_info("pre-submit put %d, get %d, size %d", c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num); /* Make sure we have enough space for gpfifo entries. If not, * wait for signals from completed submits */ if (gp_free_count(c) < num_entries + extra_entries) { /* we can get here via locked ioctl and other paths too */ int locked_path = mutex_is_locked(&c->ioctl_lock); if (locked_path) mutex_unlock(&c->ioctl_lock); trace_gk20a_gpfifo_submit_wait_for_space(c->g->dev->name); err = wait_event_interruptible(c->submit_wq, get_gp_free_count(c) >= num_entries + extra_entries || c->has_timedout); trace_gk20a_gpfifo_submit_wait_for_space_done(c->g->dev->name); if (locked_path) mutex_lock(&c->ioctl_lock); } if (c->has_timedout) { err = -ETIMEDOUT; goto clean_up; } if (err) { gk20a_err(d, "timeout waiting for gpfifo space"); err = -EAGAIN; goto clean_up; } mutex_lock(&c->submit_lock); mutex_lock(&c->sync_lock); if (!c->sync) { c->sync = gk20a_channel_sync_create(c); if (!c->sync) { err = -ENOMEM; mutex_unlock(&c->submit_lock); goto clean_up; } if (g->ops.fifo.resetup_ramfc) err = g->ops.fifo.resetup_ramfc(c); if (err) return err; } mutex_unlock(&c->sync_lock); /* * optionally insert syncpt wait in the beginning of gpfifo submission * when user requested and the wait hasn't expired. * validate that the id makes sense, elide if not * the only reason this isn't being unceremoniously killed is to * keep running some tests which trigger this condition */ if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) { if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) { wait_fence_fd = fence->id; err = c->sync->wait_fd(c->sync, wait_fence_fd, &wait_cmd, &pre_fence); } else { err = c->sync->wait_syncpt(c->sync, fence->id, fence->value, &wait_cmd, &pre_fence); } } if (err) { mutex_unlock(&c->submit_lock); goto clean_up; } /* always insert syncpt increment at end of gpfifo submission to keep track of method completion for idle railgating */ if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) err = c->sync->incr_user(c->sync, wait_fence_fd, &incr_cmd, &post_fence, need_wfi); else err = c->sync->incr(c->sync, &incr_cmd, &post_fence); if (err) { mutex_unlock(&c->submit_lock); goto clean_up; } if (wait_cmd) { gpfifo_mem[c->gpfifo.put].entry0 = u64_lo32(wait_cmd->gva); gpfifo_mem[c->gpfifo.put].entry1 = u64_hi32(wait_cmd->gva) | pbdma_gp_entry1_length_f(wait_cmd->size); trace_gk20a_push_cmdbuf(c->g->dev->name, 0, wait_cmd->size, 0, wait_cmd->ptr); c->gpfifo.put = (c->gpfifo.put + 1) & (c->gpfifo.entry_num - 1); /* save gp_put */ wait_cmd->gp_put = c->gpfifo.put; } /* * Copy source gpfifo entries into the gpfifo ring buffer, * potentially splitting into two memcpies to handle the * ring buffer wrap-around case. */ start = c->gpfifo.put; end = start + num_entries; if (end > c->gpfifo.entry_num) { int length0 = c->gpfifo.entry_num - start; int length1 = num_entries - length0; memcpy(gpfifo_mem + start, gpfifo, length0 * sizeof(*gpfifo)); memcpy(gpfifo_mem, gpfifo + length0, length1 * sizeof(*gpfifo)); trace_write_pushbuffer_range(c, gpfifo, length0); trace_write_pushbuffer_range(c, gpfifo + length0, length1); } else { memcpy(gpfifo_mem + start, gpfifo, num_entries * sizeof(*gpfifo)); trace_write_pushbuffer_range(c, gpfifo, num_entries); } c->gpfifo.put = (c->gpfifo.put + num_entries) & (c->gpfifo.entry_num - 1); if (incr_cmd) { gpfifo_mem[c->gpfifo.put].entry0 = u64_lo32(incr_cmd->gva); gpfifo_mem[c->gpfifo.put].entry1 = u64_hi32(incr_cmd->gva) | pbdma_gp_entry1_length_f(incr_cmd->size); trace_gk20a_push_cmdbuf(c->g->dev->name, 0, incr_cmd->size, 0, incr_cmd->ptr); c->gpfifo.put = (c->gpfifo.put + 1) & (c->gpfifo.entry_num - 1); /* save gp_put */ incr_cmd->gp_put = c->gpfifo.put; } gk20a_fence_put(c->last_submit.pre_fence); gk20a_fence_put(c->last_submit.post_fence); c->last_submit.pre_fence = pre_fence; c->last_submit.post_fence = post_fence; if (fence_out) *fence_out = gk20a_fence_get(post_fence); /* TODO! Check for errors... */ gk20a_channel_add_job(c, pre_fence, post_fence); c->cmds_pending = true; gk20a_bar1_writel(g, c->userd_gpu_va + 4 * ram_userd_gp_put_w(), c->gpfifo.put); mutex_unlock(&c->submit_lock); trace_gk20a_channel_submitted_gpfifo(c->g->dev->name, c->hw_chid, num_entries, flags, post_fence->syncpt_id, post_fence->syncpt_value); gk20a_dbg_info("post-submit put %d, get %d, size %d", c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num); gk20a_dbg_fn("done"); return err; clean_up: gk20a_err(d, "fail"); free_priv_cmdbuf(c, wait_cmd); free_priv_cmdbuf(c, incr_cmd); gk20a_fence_put(pre_fence); gk20a_fence_put(post_fence); gk20a_idle(g->dev); return err; } int gk20a_init_channel_support(struct gk20a *g, u32 chid) { struct channel_gk20a *c = g->fifo.channel+chid; c->g = NULL; c->hw_chid = chid; c->bound = false; spin_lock_init(&c->ref_obtain_lock); atomic_set(&c->ref_count, 0); c->referenceable = false; init_waitqueue_head(&c->ref_count_dec_wq); mutex_init(&c->ioctl_lock); mutex_init(&c->jobs_lock); mutex_init(&c->submit_lock); mutex_init(&c->timeout.lock); mutex_init(&c->sync_lock); INIT_DELAYED_WORK(&c->timeout.wq, gk20a_channel_timeout_handler); INIT_LIST_HEAD(&c->jobs); #if defined(CONFIG_GK20A_CYCLE_STATS) mutex_init(&c->cyclestate.cyclestate_buffer_mutex); mutex_init(&c->cs_client_mutex); #endif INIT_LIST_HEAD(&c->dbg_s_list); mutex_init(&c->dbg_s_lock); list_add(&c->free_chs, &g->fifo.free_chs); return 0; } int gk20a_channel_finish(struct channel_gk20a *ch, unsigned long timeout) { int err = 0; struct gk20a_fence *fence = ch->last_submit.post_fence; if (!ch->cmds_pending) return 0; /* Do not wait for a timedout channel */ if (ch->has_timedout) return -ETIMEDOUT; gk20a_dbg_fn("waiting for channel to finish thresh:%d sema:%p", fence->syncpt_value, fence->semaphore); err = gk20a_fence_wait(fence, timeout); if (WARN_ON(err)) dev_warn(dev_from_gk20a(ch->g), "timed out waiting for gk20a channel to finish"); else ch->cmds_pending = false; return err; } static int gk20a_channel_wait_semaphore(struct channel_gk20a *ch, ulong id, u32 offset, u32 payload, long timeout) { struct platform_device *pdev = ch->g->dev; struct dma_buf *dmabuf; void *data; u32 *semaphore; int ret = 0; long remain; /* do not wait if channel has timed out */ if (ch->has_timedout) return -ETIMEDOUT; dmabuf = dma_buf_get(id); if (IS_ERR(dmabuf)) { gk20a_err(&pdev->dev, "invalid notifier nvmap handle 0x%lx", id); return -EINVAL; } data = dma_buf_kmap(dmabuf, offset >> PAGE_SHIFT); if (!data) { gk20a_err(&pdev->dev, "failed to map notifier memory"); ret = -EINVAL; goto cleanup_put; } semaphore = data + (offset & ~PAGE_MASK); remain = wait_event_interruptible_timeout( ch->semaphore_wq, *semaphore == payload || ch->has_timedout, timeout); if (remain == 0 && *semaphore != payload) ret = -ETIMEDOUT; else if (remain < 0) ret = remain; dma_buf_kunmap(dmabuf, offset >> PAGE_SHIFT, data); cleanup_put: dma_buf_put(dmabuf); return ret; } static int gk20a_channel_wait(struct channel_gk20a *ch, struct nvgpu_wait_args *args) { struct device *d = dev_from_gk20a(ch->g); struct dma_buf *dmabuf; struct notification *notif; struct timespec tv; u64 jiffies; ulong id; u32 offset; unsigned long timeout; int remain, ret = 0; gk20a_dbg_fn(""); if (ch->has_timedout) return -ETIMEDOUT; if (args->timeout == NVGPU_NO_TIMEOUT) timeout = MAX_SCHEDULE_TIMEOUT; else timeout = (u32)msecs_to_jiffies(args->timeout); switch (args->type) { case NVGPU_WAIT_TYPE_NOTIFIER: id = args->condition.notifier.dmabuf_fd; offset = args->condition.notifier.offset; dmabuf = dma_buf_get(id); if (IS_ERR(dmabuf)) { gk20a_err(d, "invalid notifier nvmap handle 0x%lx", id); return -EINVAL; } notif = dma_buf_vmap(dmabuf); if (!notif) { gk20a_err(d, "failed to map notifier memory"); return -ENOMEM; } notif = (struct notification *)((uintptr_t)notif + offset); /* user should set status pending before * calling this ioctl */ remain = wait_event_interruptible_timeout( ch->notifier_wq, notif->status == 0 || ch->has_timedout, timeout); if (remain == 0 && notif->status != 0) { ret = -ETIMEDOUT; goto notif_clean_up; } else if (remain < 0) { ret = -EINTR; goto notif_clean_up; } /* TBD: fill in correct information */ jiffies = get_jiffies_64(); jiffies_to_timespec(jiffies, &tv); notif->timestamp.nanoseconds[0] = tv.tv_nsec; notif->timestamp.nanoseconds[1] = tv.tv_sec; notif->info32 = 0xDEADBEEF; /* should be object name */ notif->info16 = ch->hw_chid; /* should be method offset */ notif_clean_up: dma_buf_vunmap(dmabuf, notif); return ret; case NVGPU_WAIT_TYPE_SEMAPHORE: ret = gk20a_channel_wait_semaphore(ch, args->condition.semaphore.dmabuf_fd, args->condition.semaphore.offset, args->condition.semaphore.payload, timeout); break; default: ret = -EINVAL; break; } return ret; } /* poll events for semaphores */ static void gk20a_channel_events_enable(struct channel_gk20a_poll_events *ev) { gk20a_dbg_fn(""); mutex_lock(&ev->lock); ev->events_enabled = true; ev->num_pending_events = 0; mutex_unlock(&ev->lock); } static void gk20a_channel_events_disable(struct channel_gk20a_poll_events *ev) { gk20a_dbg_fn(""); mutex_lock(&ev->lock); ev->events_enabled = false; ev->num_pending_events = 0; mutex_unlock(&ev->lock); } static void gk20a_channel_events_clear(struct channel_gk20a_poll_events *ev) { gk20a_dbg_fn(""); mutex_lock(&ev->lock); if (ev->events_enabled && ev->num_pending_events > 0) ev->num_pending_events--; mutex_unlock(&ev->lock); } static int gk20a_channel_events_ctrl(struct channel_gk20a *ch, struct nvgpu_channel_events_ctrl_args *args) { int ret = 0; gk20a_dbg(gpu_dbg_fn | gpu_dbg_info, "channel events ctrl cmd %d", args->cmd); switch (args->cmd) { case NVGPU_IOCTL_CHANNEL_EVENTS_CTRL_CMD_ENABLE: gk20a_channel_events_enable(&ch->poll_events); break; case NVGPU_IOCTL_CHANNEL_EVENTS_CTRL_CMD_DISABLE: gk20a_channel_events_disable(&ch->poll_events); break; case NVGPU_IOCTL_CHANNEL_EVENTS_CTRL_CMD_CLEAR: gk20a_channel_events_clear(&ch->poll_events); break; default: gk20a_err(dev_from_gk20a(ch->g), "unrecognized channel events ctrl cmd: 0x%x", args->cmd); ret = -EINVAL; break; } return ret; } void gk20a_channel_event(struct channel_gk20a *ch) { mutex_lock(&ch->poll_events.lock); if (ch->poll_events.events_enabled) { gk20a_dbg_info("posting event on channel id %d", ch->hw_chid); gk20a_dbg_info("%d channel events pending", ch->poll_events.num_pending_events); ch->poll_events.num_pending_events++; /* not waking up here, caller does that */ } mutex_unlock(&ch->poll_events.lock); } unsigned int gk20a_channel_poll(struct file *filep, poll_table *wait) { unsigned int mask = 0; struct channel_gk20a *ch = filep->private_data; gk20a_dbg(gpu_dbg_fn | gpu_dbg_info, ""); poll_wait(filep, &ch->semaphore_wq, wait); mutex_lock(&ch->poll_events.lock); if (ch->poll_events.events_enabled && ch->poll_events.num_pending_events > 0) { gk20a_dbg_info("found pending event on channel id %d", ch->hw_chid); gk20a_dbg_info("%d channel events pending", ch->poll_events.num_pending_events); mask = (POLLPRI | POLLIN); } mutex_unlock(&ch->poll_events.lock); return mask; } static int gk20a_channel_set_priority(struct channel_gk20a *ch, u32 priority) { u32 timeslice_timeout; if (gk20a_is_channel_marked_as_tsg(ch)) { gk20a_err(dev_from_gk20a(ch->g), "invalid operation for TSG!\n"); return -EINVAL; } /* set priority of graphics channel */ switch (priority) { case NVGPU_PRIORITY_LOW: timeslice_timeout = ch->g->timeslice_low_priority_us; break; case NVGPU_PRIORITY_MEDIUM: timeslice_timeout = ch->g->timeslice_medium_priority_us; break; case NVGPU_PRIORITY_HIGH: timeslice_timeout = ch->g->timeslice_high_priority_us; break; default: pr_err("Unsupported priority"); return -EINVAL; } channel_gk20a_set_schedule_params(ch, timeslice_timeout); return 0; } static int gk20a_channel_zcull_bind(struct channel_gk20a *ch, struct nvgpu_zcull_bind_args *args) { struct gk20a *g = ch->g; struct gr_gk20a *gr = &g->gr; gk20a_dbg_fn(""); return g->ops.gr.bind_ctxsw_zcull(g, gr, ch, args->gpu_va, args->mode); } /* in this context the "channel" is the host1x channel which * maps to *all* gk20a channels */ int gk20a_channel_suspend(struct gk20a *g) { struct fifo_gk20a *f = &g->fifo; u32 chid; bool channels_in_use = false; int err; gk20a_dbg_fn(""); /* wait for engine idle */ err = g->ops.fifo.wait_engine_idle(g); if (err) return err; for (chid = 0; chid < f->num_channels; chid++) { struct channel_gk20a *ch = &f->channel[chid]; if (gk20a_channel_get(ch)) { gk20a_dbg_info("suspend channel %d", chid); /* disable channel */ g->ops.fifo.disable_channel(ch); /* preempt the channel */ g->ops.fifo.preempt_channel(g, chid); /* wait for channel update notifiers */ if (ch->update_fn && work_pending(&ch->update_fn_work)) flush_work(&ch->update_fn_work); channels_in_use = true; gk20a_channel_put(ch); } } if (channels_in_use) { g->ops.fifo.update_runlist(g, 0, ~0, false, true); for (chid = 0; chid < f->num_channels; chid++) { if (gk20a_channel_get(&f->channel[chid])) { g->ops.fifo.unbind_channel(&f->channel[chid]); gk20a_channel_put(&f->channel[chid]); } } } gk20a_dbg_fn("done"); return 0; } int gk20a_channel_resume(struct gk20a *g) { struct fifo_gk20a *f = &g->fifo; u32 chid; bool channels_in_use = false; gk20a_dbg_fn(""); for (chid = 0; chid < f->num_channels; chid++) { if (gk20a_channel_get(&f->channel[chid])) { gk20a_dbg_info("resume channel %d", chid); g->ops.fifo.bind_channel(&f->channel[chid]); channels_in_use = true; gk20a_channel_put(&f->channel[chid]); } } if (channels_in_use) g->ops.fifo.update_runlist(g, 0, ~0, true, true); gk20a_dbg_fn("done"); return 0; } void gk20a_channel_semaphore_wakeup(struct gk20a *g) { struct fifo_gk20a *f = &g->fifo; u32 chid; gk20a_dbg_fn(""); for (chid = 0; chid < f->num_channels; chid++) { struct channel_gk20a *c = g->fifo.channel+chid; if (gk20a_channel_get(c)) { gk20a_channel_event(c); wake_up_interruptible_all(&c->semaphore_wq); gk20a_channel_update(c, 0); gk20a_channel_put(c); } } } static int gk20a_ioctl_channel_submit_gpfifo( struct channel_gk20a *ch, struct nvgpu_submit_gpfifo_args *args) { struct gk20a_fence *fence_out; void *gpfifo = NULL; u32 size; int ret = 0; gk20a_dbg_fn(""); if (ch->has_timedout) return -ETIMEDOUT; /* zero-sized submits are allowed, since they can be used for * synchronization; we might still wait and do an increment */ size = args->num_entries * sizeof(struct nvgpu_gpfifo); if (size) { gpfifo = nvgpu_alloc(size, false); if (!gpfifo) return -ENOMEM; if (copy_from_user(gpfifo, (void __user *)(uintptr_t)args->gpfifo, size)) { ret = -EINVAL; goto clean_up; } } ret = gk20a_submit_channel_gpfifo(ch, gpfifo, args->num_entries, args->flags, &args->fence, &fence_out); if (ret) goto clean_up; /* Convert fence_out to something we can pass back to user space. */ if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) { if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) { int fd = gk20a_fence_install_fd(fence_out); if (fd < 0) ret = fd; else args->fence.id = fd; } else { args->fence.id = fence_out->syncpt_id; args->fence.value = fence_out->syncpt_value; } } gk20a_fence_put(fence_out); clean_up: nvgpu_free(gpfifo); return ret; } void gk20a_init_channel(struct gpu_ops *gops) { gops->fifo.bind_channel = channel_gk20a_bind; gops->fifo.unbind_channel = channel_gk20a_unbind; gops->fifo.disable_channel = channel_gk20a_disable; gops->fifo.enable_channel = channel_gk20a_enable; gops->fifo.alloc_inst = channel_gk20a_alloc_inst; gops->fifo.free_inst = channel_gk20a_free_inst; gops->fifo.setup_ramfc = channel_gk20a_setup_ramfc; } long gk20a_channel_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct channel_gk20a *ch = filp->private_data; struct platform_device *dev = ch->g->dev; u8 buf[NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE]; int err = 0; gk20a_dbg_fn("start %d", _IOC_NR(cmd)); if ((_IOC_TYPE(cmd) != NVGPU_IOCTL_MAGIC) || (_IOC_NR(cmd) == 0) || (_IOC_NR(cmd) > NVGPU_IOCTL_CHANNEL_LAST) || (_IOC_SIZE(cmd) > NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE)) return -EINVAL; if (_IOC_DIR(cmd) & _IOC_WRITE) { if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd))) return -EFAULT; } /* take a ref or return timeout if channel refs can't be taken */ ch = gk20a_channel_get(ch); if (!ch) return -ETIMEDOUT; /* protect our sanity for threaded userspace - most of the channel is * not thread safe */ mutex_lock(&ch->ioctl_lock); /* this ioctl call keeps a ref to the file which keeps a ref to the * channel */ switch (cmd) { case NVGPU_IOCTL_CHANNEL_OPEN: err = gk20a_channel_open_ioctl(ch->g, (struct nvgpu_channel_open_args *)buf); break; case NVGPU_IOCTL_CHANNEL_SET_NVMAP_FD: break; case NVGPU_IOCTL_CHANNEL_ALLOC_OBJ_CTX: err = gk20a_busy(dev); if (err) { dev_err(&dev->dev, "%s: failed to host gk20a for ioctl cmd: 0x%x", __func__, cmd); break; } err = ch->g->ops.gr.alloc_obj_ctx(ch, (struct nvgpu_alloc_obj_ctx_args *)buf); gk20a_idle(dev); break; case NVGPU_IOCTL_CHANNEL_FREE_OBJ_CTX: err = gk20a_busy(dev); if (err) { dev_err(&dev->dev, "%s: failed to host gk20a for ioctl cmd: 0x%x", __func__, cmd); break; } err = ch->g->ops.gr.free_obj_ctx(ch, (struct nvgpu_free_obj_ctx_args *)buf); gk20a_idle(dev); break; case NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO: err = gk20a_busy(dev); if (err) { dev_err(&dev->dev, "%s: failed to host gk20a for ioctl cmd: 0x%x", __func__, cmd); break; } err = gk20a_alloc_channel_gpfifo(ch, (struct nvgpu_alloc_gpfifo_args *)buf); gk20a_idle(dev); break; case NVGPU_IOCTL_CHANNEL_SUBMIT_GPFIFO: err = gk20a_ioctl_channel_submit_gpfifo(ch, (struct nvgpu_submit_gpfifo_args *)buf); break; case NVGPU_IOCTL_CHANNEL_WAIT: err = gk20a_busy(dev); if (err) { dev_err(&dev->dev, "%s: failed to host gk20a for ioctl cmd: 0x%x", __func__, cmd); break; } /* waiting is thread-safe, not dropping this mutex could * deadlock in certain conditions */ mutex_unlock(&ch->ioctl_lock); err = gk20a_channel_wait(ch, (struct nvgpu_wait_args *)buf); mutex_lock(&ch->ioctl_lock); gk20a_idle(dev); break; case NVGPU_IOCTL_CHANNEL_ZCULL_BIND: err = gk20a_busy(dev); if (err) { dev_err(&dev->dev, "%s: failed to host gk20a for ioctl cmd: 0x%x", __func__, cmd); break; } err = gk20a_channel_zcull_bind(ch, (struct nvgpu_zcull_bind_args *)buf); gk20a_idle(dev); break; case NVGPU_IOCTL_CHANNEL_SET_ERROR_NOTIFIER: err = gk20a_busy(dev); if (err) { dev_err(&dev->dev, "%s: failed to host gk20a for ioctl cmd: 0x%x", __func__, cmd); break; } err = gk20a_init_error_notifier(ch, (struct nvgpu_set_error_notifier *)buf); gk20a_idle(dev); break; #ifdef CONFIG_GK20A_CYCLE_STATS case NVGPU_IOCTL_CHANNEL_CYCLE_STATS: err = gk20a_busy(dev); if (err) { dev_err(&dev->dev, "%s: failed to host gk20a for ioctl cmd: 0x%x", __func__, cmd); break; } err = gk20a_channel_cycle_stats(ch, (struct nvgpu_cycle_stats_args *)buf); gk20a_idle(dev); break; #endif case NVGPU_IOCTL_CHANNEL_SET_TIMEOUT: { u32 timeout = (u32)((struct nvgpu_set_timeout_args *)buf)->timeout; gk20a_dbg(gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d", timeout, ch->hw_chid); ch->timeout_ms_max = timeout; break; } case NVGPU_IOCTL_CHANNEL_SET_TIMEOUT_EX: { u32 timeout = (u32)((struct nvgpu_set_timeout_args *)buf)->timeout; bool timeout_debug_dump = !((u32) ((struct nvgpu_set_timeout_ex_args *)buf)->flags & (1 << NVGPU_TIMEOUT_FLAG_DISABLE_DUMP)); gk20a_dbg(gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d", timeout, ch->hw_chid); ch->timeout_ms_max = timeout; ch->timeout_debug_dump = timeout_debug_dump; break; } case NVGPU_IOCTL_CHANNEL_GET_TIMEDOUT: ((struct nvgpu_get_param_args *)buf)->value = ch->has_timedout; break; case NVGPU_IOCTL_CHANNEL_SET_PRIORITY: err = gk20a_busy(dev); if (err) { dev_err(&dev->dev, "%s: failed to host gk20a for ioctl cmd: 0x%x", __func__, cmd); break; } err = gk20a_channel_set_priority(ch, ((struct nvgpu_set_priority_args *)buf)->priority); gk20a_idle(dev); break; case NVGPU_IOCTL_CHANNEL_ENABLE: err = gk20a_busy(dev); if (err) { dev_err(&dev->dev, "%s: failed to host gk20a for ioctl cmd: 0x%x", __func__, cmd); break; } /* enable channel */ gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid), gk20a_readl(ch->g, ccsr_channel_r(ch->hw_chid)) | ccsr_channel_enable_set_true_f()); gk20a_idle(dev); break; case NVGPU_IOCTL_CHANNEL_DISABLE: err = gk20a_busy(dev); if (err) { dev_err(&dev->dev, "%s: failed to host gk20a for ioctl cmd: 0x%x", __func__, cmd); break; } /* disable channel */ gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid), gk20a_readl(ch->g, ccsr_channel_r(ch->hw_chid)) | ccsr_channel_enable_clr_true_f()); gk20a_idle(dev); break; case NVGPU_IOCTL_CHANNEL_PREEMPT: err = gk20a_busy(dev); if (err) { dev_err(&dev->dev, "%s: failed to host gk20a for ioctl cmd: 0x%x", __func__, cmd); break; } err = gk20a_fifo_preempt(ch->g, ch); gk20a_idle(dev); break; case NVGPU_IOCTL_CHANNEL_FORCE_RESET: err = gk20a_busy(dev); if (err) { dev_err(&dev->dev, "%s: failed to host gk20a for ioctl cmd: 0x%x", __func__, cmd); break; } err = gk20a_fifo_force_reset_ch(ch, true); gk20a_idle(dev); break; case NVGPU_IOCTL_CHANNEL_EVENTS_CTRL: err = gk20a_channel_events_ctrl(ch, (struct nvgpu_channel_events_ctrl_args *)buf); break; #ifdef CONFIG_GK20A_CYCLE_STATS case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT: err = gk20a_busy(dev); if (err) { dev_err(&dev->dev, "%s: failed to host gk20a for ioctl cmd: 0x%x", __func__, cmd); break; } err = gk20a_channel_cycle_stats_snapshot(ch, (struct nvgpu_cycle_stats_snapshot_args *)buf); gk20a_idle(dev); break; #endif default: dev_dbg(&dev->dev, "unrecognized ioctl cmd: 0x%x", cmd); err = -ENOTTY; break; } if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd)); mutex_unlock(&ch->ioctl_lock); gk20a_channel_put(ch); gk20a_dbg_fn("end"); return err; }