/* * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, * version 2, as published by the Free Software Foundation. * * This program is distributed in the hope it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include "gv11b/fifo_gv11b.h" #include "platform_gk20a.h" #include "ioctl_tsg.h" #include "ioctl_channel.h" #include "os_linux.h" struct tsg_private { struct gk20a *g; struct tsg_gk20a *tsg; }; static int gk20a_tsg_bind_channel_fd(struct tsg_gk20a *tsg, int ch_fd) { struct channel_gk20a *ch; int err; ch = gk20a_get_channel_from_file(ch_fd); if (!ch) return -EINVAL; err = ch->g->ops.fifo.tsg_bind_channel(tsg, ch); gk20a_channel_put(ch); return err; } static int gk20a_tsg_ioctl_bind_channel_ex(struct gk20a *g, struct tsg_gk20a *tsg, struct nvgpu_tsg_bind_channel_ex_args *arg) { struct nvgpu_sched_ctrl *sched = &g->sched_ctrl; struct channel_gk20a *ch; struct gr_gk20a *gr = &g->gr; int err = 0; nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid); nvgpu_mutex_acquire(&sched->control_lock); if (sched->control_locked) { err = -EPERM; goto mutex_release; } err = gk20a_busy(g); if (err) { nvgpu_err(g, "failed to power on gpu"); goto mutex_release; } ch = gk20a_get_channel_from_file(arg->channel_fd); if (!ch) { err = -EINVAL; goto idle; } if (arg->tpc_pg_enabled && (!tsg->tpc_num_initialized)) { if ((arg->num_active_tpcs > gr->max_tpc_count) || !(arg->num_active_tpcs)) { nvgpu_err(g, "Invalid num of active TPCs"); err = -EINVAL; goto ch_put; } tsg->tpc_num_initialized = true; tsg->num_active_tpcs = arg->num_active_tpcs; tsg->tpc_pg_enabled = true; } else { tsg->tpc_pg_enabled = false; nvgpu_log(g, gpu_dbg_info, "dynamic TPC-PG not enabled"); } if (arg->subcontext_id < g->fifo.max_subctx_count) { ch->subctx_id = arg->subcontext_id; } else { err = -EINVAL; goto ch_put; } nvgpu_log(g, gpu_dbg_info, "channel id : %d : subctx: %d", ch->chid, ch->subctx_id); /* Use runqueue selector 1 for all ASYNC ids */ if (ch->subctx_id > CHANNEL_INFO_VEID0) ch->runqueue_sel = 1; err = ch->g->ops.fifo.tsg_bind_channel(tsg, ch); ch_put: gk20a_channel_put(ch); idle: gk20a_idle(g); mutex_release: nvgpu_mutex_release(&sched->control_lock); return err; } static int gk20a_tsg_unbind_channel_fd(struct tsg_gk20a *tsg, int ch_fd) { struct channel_gk20a *ch; int err = 0; ch = gk20a_get_channel_from_file(ch_fd); if (!ch) return -EINVAL; if (ch->tsgid != tsg->tsgid) { err = -EINVAL; goto out; } err = gk20a_tsg_unbind_channel(ch, false); if (err == -EAGAIN) { goto out; } /* * Mark the channel timedout since channel unbound from TSG * has no context of its own so it can't serve any job */ gk20a_channel_set_timedout(ch); out: gk20a_channel_put(ch); return err; } static int gk20a_tsg_get_event_data_from_id(struct tsg_gk20a *tsg, unsigned int event_id, struct gk20a_event_id_data **event_id_data) { struct gk20a_event_id_data *local_event_id_data; bool event_found = false; nvgpu_mutex_acquire(&tsg->event_id_list_lock); nvgpu_list_for_each_entry(local_event_id_data, &tsg->event_id_list, gk20a_event_id_data, event_id_node) { if (local_event_id_data->event_id == event_id) { event_found = true; break; } } nvgpu_mutex_release(&tsg->event_id_list_lock); if (event_found) { *event_id_data = local_event_id_data; return 0; } else { return -1; } } /* * Convert common event_id of the form NVGPU_EVENT_ID_* to Linux specific * event_id of the form NVGPU_IOCTL_CHANNEL_EVENT_ID_* which is used in IOCTLs */ static u32 nvgpu_event_id_to_ioctl_channel_event_id(u32 event_id) { switch (event_id) { case NVGPU_EVENT_ID_BPT_INT: return NVGPU_IOCTL_CHANNEL_EVENT_ID_BPT_INT; case NVGPU_EVENT_ID_BPT_PAUSE: return NVGPU_IOCTL_CHANNEL_EVENT_ID_BPT_PAUSE; case NVGPU_EVENT_ID_BLOCKING_SYNC: return NVGPU_IOCTL_CHANNEL_EVENT_ID_BLOCKING_SYNC; case NVGPU_EVENT_ID_CILP_PREEMPTION_STARTED: return NVGPU_IOCTL_CHANNEL_EVENT_ID_CILP_PREEMPTION_STARTED; case NVGPU_EVENT_ID_CILP_PREEMPTION_COMPLETE: return NVGPU_IOCTL_CHANNEL_EVENT_ID_CILP_PREEMPTION_COMPLETE; case NVGPU_EVENT_ID_GR_SEMAPHORE_WRITE_AWAKEN: return NVGPU_IOCTL_CHANNEL_EVENT_ID_GR_SEMAPHORE_WRITE_AWAKEN; } return NVGPU_IOCTL_CHANNEL_EVENT_ID_MAX; } void gk20a_tsg_event_id_post_event(struct tsg_gk20a *tsg, int __event_id) { struct gk20a_event_id_data *event_id_data; u32 event_id; int err = 0; struct gk20a *g = tsg->g; event_id = nvgpu_event_id_to_ioctl_channel_event_id(__event_id); if (event_id >= NVGPU_IOCTL_CHANNEL_EVENT_ID_MAX) return; err = gk20a_tsg_get_event_data_from_id(tsg, event_id, &event_id_data); if (err) return; nvgpu_mutex_acquire(&event_id_data->lock); nvgpu_log_info(g, "posting event for event_id=%d on tsg=%d\n", event_id, tsg->tsgid); event_id_data->event_posted = true; nvgpu_cond_broadcast_interruptible(&event_id_data->event_id_wq); nvgpu_mutex_release(&event_id_data->lock); } static unsigned int gk20a_event_id_poll(struct file *filep, poll_table *wait) { unsigned int mask = 0; struct gk20a_event_id_data *event_id_data = filep->private_data; struct gk20a *g = event_id_data->g; u32 event_id = event_id_data->event_id; struct tsg_gk20a *tsg = g->fifo.tsg + event_id_data->id; nvgpu_log(g, gpu_dbg_fn | gpu_dbg_info, " "); poll_wait(filep, &event_id_data->event_id_wq.wq, wait); nvgpu_mutex_acquire(&event_id_data->lock); if (event_id_data->event_posted) { nvgpu_log_info(g, "found pending event_id=%d on TSG=%d\n", event_id, tsg->tsgid); mask = (POLLPRI | POLLIN); event_id_data->event_posted = false; } nvgpu_mutex_release(&event_id_data->lock); return mask; } static int gk20a_event_id_release(struct inode *inode, struct file *filp) { struct gk20a_event_id_data *event_id_data = filp->private_data; struct gk20a *g = event_id_data->g; struct tsg_gk20a *tsg = g->fifo.tsg + event_id_data->id; nvgpu_mutex_acquire(&tsg->event_id_list_lock); nvgpu_list_del(&event_id_data->event_id_node); nvgpu_mutex_release(&tsg->event_id_list_lock); nvgpu_mutex_destroy(&event_id_data->lock); gk20a_put(g); nvgpu_kfree(g, event_id_data); filp->private_data = NULL; return 0; } const struct file_operations gk20a_event_id_ops = { .owner = THIS_MODULE, .poll = gk20a_event_id_poll, .release = gk20a_event_id_release, }; static int gk20a_tsg_event_id_enable(struct tsg_gk20a *tsg, int event_id, int *fd) { int err = 0; int local_fd; struct file *file; char name[64]; struct gk20a_event_id_data *event_id_data; struct gk20a *g; g = gk20a_get(tsg->g); if (!g) return -ENODEV; err = gk20a_tsg_get_event_data_from_id(tsg, event_id, &event_id_data); if (err == 0) { /* We already have event enabled */ err = -EINVAL; goto free_ref; } err = get_unused_fd_flags(O_RDWR); if (err < 0) goto free_ref; local_fd = err; snprintf(name, sizeof(name), "nvgpu-event%d-fd%d", event_id, local_fd); file = anon_inode_getfile(name, &gk20a_event_id_ops, NULL, O_RDWR); if (IS_ERR(file)) { err = PTR_ERR(file); goto clean_up; } event_id_data = nvgpu_kzalloc(tsg->g, sizeof(*event_id_data)); if (!event_id_data) { err = -ENOMEM; goto clean_up_file; } event_id_data->g = g; event_id_data->id = tsg->tsgid; event_id_data->event_id = event_id; nvgpu_cond_init(&event_id_data->event_id_wq); err = nvgpu_mutex_init(&event_id_data->lock); if (err) goto clean_up_free; nvgpu_init_list_node(&event_id_data->event_id_node); nvgpu_mutex_acquire(&tsg->event_id_list_lock); nvgpu_list_add_tail(&event_id_data->event_id_node, &tsg->event_id_list); nvgpu_mutex_release(&tsg->event_id_list_lock); fd_install(local_fd, file); file->private_data = event_id_data; *fd = local_fd; return 0; clean_up_free: nvgpu_kfree(g, event_id_data); clean_up_file: fput(file); clean_up: put_unused_fd(local_fd); free_ref: gk20a_put(g); return err; } static int gk20a_tsg_event_id_ctrl(struct gk20a *g, struct tsg_gk20a *tsg, struct nvgpu_event_id_ctrl_args *args) { int err = 0; int fd = -1; if (args->event_id >= NVGPU_IOCTL_CHANNEL_EVENT_ID_MAX) return -EINVAL; nvgpu_speculation_barrier(); switch (args->cmd) { case NVGPU_IOCTL_CHANNEL_EVENT_ID_CMD_ENABLE: err = gk20a_tsg_event_id_enable(tsg, args->event_id, &fd); if (!err) args->event_fd = fd; break; default: nvgpu_err(tsg->g, "unrecognized tsg event id cmd: 0x%x", args->cmd); err = -EINVAL; break; } return err; } int nvgpu_ioctl_tsg_open(struct gk20a *g, struct file *filp) { struct tsg_private *priv; struct tsg_gk20a *tsg; struct device *dev; int err; g = gk20a_get(g); if (!g) return -ENODEV; dev = dev_from_gk20a(g); nvgpu_log(g, gpu_dbg_fn, "tsg: %s", dev_name(dev)); priv = nvgpu_kmalloc(g, sizeof(*priv)); if (!priv) { err = -ENOMEM; goto free_ref; } err = gk20a_busy(g); if (err) { nvgpu_err(g, "failed to power on, %d", err); goto free_mem; } tsg = gk20a_tsg_open(g, nvgpu_current_pid(g)); gk20a_idle(g); if (!tsg) { err = -ENOMEM; goto free_mem; } priv->g = g; priv->tsg = tsg; filp->private_data = priv; gk20a_sched_ctrl_tsg_added(g, tsg); return 0; free_mem: nvgpu_kfree(g, priv); free_ref: gk20a_put(g); return err; } int nvgpu_ioctl_tsg_dev_open(struct inode *inode, struct file *filp) { struct nvgpu_os_linux *l; struct gk20a *g; int ret; l = container_of(inode->i_cdev, struct nvgpu_os_linux, tsg.cdev); g = &l->g; nvgpu_log_fn(g, " "); ret = gk20a_busy(g); if (ret) { nvgpu_err(g, "failed to power on, %d", ret); return ret; } ret = nvgpu_ioctl_tsg_open(&l->g, filp); gk20a_idle(g); nvgpu_log_fn(g, "done"); return ret; } void nvgpu_ioctl_tsg_release(struct nvgpu_ref *ref) { struct tsg_gk20a *tsg = container_of(ref, struct tsg_gk20a, refcount); struct gk20a *g = tsg->g; gk20a_sched_ctrl_tsg_removed(g, tsg); gk20a_tsg_release(ref); gk20a_put(g); } int nvgpu_ioctl_tsg_dev_release(struct inode *inode, struct file *filp) { struct tsg_private *priv = filp->private_data; struct tsg_gk20a *tsg; if (!priv) { /* open failed, never got a tsg for this file */ return 0; } tsg = priv->tsg; nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release); nvgpu_kfree(tsg->g, priv); return 0; } static int gk20a_tsg_ioctl_set_runlist_interleave(struct gk20a *g, struct tsg_gk20a *tsg, struct nvgpu_runlist_interleave_args *arg) { struct nvgpu_sched_ctrl *sched = &g->sched_ctrl; u32 level = arg->level; int err; nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid); nvgpu_mutex_acquire(&sched->control_lock); if (sched->control_locked) { err = -EPERM; goto done; } err = gk20a_busy(g); if (err) { nvgpu_err(g, "failed to power on gpu"); goto done; } level = nvgpu_get_common_runlist_level(level); err = gk20a_tsg_set_runlist_interleave(tsg, level); gk20a_idle(g); done: nvgpu_mutex_release(&sched->control_lock); return err; } static int gk20a_tsg_ioctl_set_timeslice(struct gk20a *g, struct tsg_gk20a *tsg, struct nvgpu_timeslice_args *arg) { struct nvgpu_sched_ctrl *sched = &g->sched_ctrl; int err; nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid); nvgpu_mutex_acquire(&sched->control_lock); if (sched->control_locked) { err = -EPERM; goto done; } err = gk20a_busy(g); if (err) { nvgpu_err(g, "failed to power on gpu"); goto done; } err = gk20a_tsg_set_timeslice(tsg, arg->timeslice_us); gk20a_idle(g); done: nvgpu_mutex_release(&sched->control_lock); return err; } static int gk20a_tsg_ioctl_get_timeslice(struct gk20a *g, struct tsg_gk20a *tsg, struct nvgpu_timeslice_args *arg) { arg->timeslice_us = gk20a_tsg_get_timeslice(tsg); return 0; } static int gk20a_tsg_ioctl_read_single_sm_error_state(struct gk20a *g, struct tsg_gk20a *tsg, struct nvgpu_tsg_read_single_sm_error_state_args *args) { struct gr_gk20a *gr = &g->gr; struct nvgpu_tsg_sm_error_state *sm_error_state; struct nvgpu_tsg_sm_error_state_record sm_error_state_record; u32 sm_id; int err = 0; sm_id = args->sm_id; if (sm_id >= gr->no_of_sm) return -EINVAL; nvgpu_speculation_barrier(); sm_error_state = tsg->sm_error_states + sm_id; sm_error_state_record.global_esr = sm_error_state->hww_global_esr; sm_error_state_record.warp_esr = sm_error_state->hww_warp_esr; sm_error_state_record.warp_esr_pc = sm_error_state->hww_warp_esr_pc; sm_error_state_record.global_esr_report_mask = sm_error_state->hww_global_esr_report_mask; sm_error_state_record.warp_esr_report_mask = sm_error_state->hww_warp_esr_report_mask; if (args->record_size > 0) { size_t write_size = sizeof(*sm_error_state); nvgpu_speculation_barrier(); if (write_size > args->record_size) write_size = args->record_size; nvgpu_mutex_acquire(&g->dbg_sessions_lock); err = copy_to_user((void __user *)(uintptr_t) args->record_mem, &sm_error_state_record, write_size); nvgpu_mutex_release(&g->dbg_sessions_lock); if (err) { nvgpu_err(g, "copy_to_user failed!"); return err; } args->record_size = write_size; } return 0; } long nvgpu_ioctl_tsg_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct tsg_private *priv = filp->private_data; struct tsg_gk20a *tsg = priv->tsg; struct gk20a *g = tsg->g; u8 __maybe_unused buf[NVGPU_TSG_IOCTL_MAX_ARG_SIZE]; int err = 0; nvgpu_log_fn(g, "start %d", _IOC_NR(cmd)); if ((_IOC_TYPE(cmd) != NVGPU_TSG_IOCTL_MAGIC) || (_IOC_NR(cmd) == 0) || (_IOC_NR(cmd) > NVGPU_TSG_IOCTL_LAST) || (_IOC_SIZE(cmd) > NVGPU_TSG_IOCTL_MAX_ARG_SIZE)) return -EINVAL; memset(buf, 0, sizeof(buf)); if (_IOC_DIR(cmd) & _IOC_WRITE) { if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd))) return -EFAULT; } if (!g->sw_ready) { err = gk20a_busy(g); if (err) return err; gk20a_idle(g); } switch (cmd) { case NVGPU_TSG_IOCTL_BIND_CHANNEL: { int ch_fd = *(int *)buf; if (ch_fd < 0) { err = -EINVAL; break; } err = gk20a_tsg_bind_channel_fd(tsg, ch_fd); break; } case NVGPU_TSG_IOCTL_BIND_CHANNEL_EX: { err = gk20a_tsg_ioctl_bind_channel_ex(g, tsg, (struct nvgpu_tsg_bind_channel_ex_args *)buf); break; } case NVGPU_TSG_IOCTL_UNBIND_CHANNEL: { int ch_fd = *(int *)buf; if (ch_fd < 0) { err = -EINVAL; break; } err = gk20a_busy(g); if (err) { nvgpu_err(g, "failed to host gk20a for ioctl cmd: 0x%x", cmd); break; } err = gk20a_tsg_unbind_channel_fd(tsg, ch_fd); gk20a_idle(g); break; } case NVGPU_IOCTL_TSG_ENABLE: { err = gk20a_busy(g); if (err) { nvgpu_err(g, "failed to host gk20a for ioctl cmd: 0x%x", cmd); return err; } g->ops.fifo.enable_tsg(tsg); gk20a_idle(g); break; } case NVGPU_IOCTL_TSG_DISABLE: { err = gk20a_busy(g); if (err) { nvgpu_err(g, "failed to host gk20a for ioctl cmd: 0x%x", cmd); return err; } g->ops.fifo.disable_tsg(tsg); gk20a_idle(g); break; } case NVGPU_IOCTL_TSG_PREEMPT: { err = gk20a_busy(g); if (err) { nvgpu_err(g, "failed to host gk20a for ioctl cmd: 0x%x", cmd); return err; } /* preempt TSG */ err = g->ops.fifo.preempt_tsg(g, tsg); gk20a_idle(g); break; } case NVGPU_IOCTL_TSG_EVENT_ID_CTRL: { err = gk20a_tsg_event_id_ctrl(g, tsg, (struct nvgpu_event_id_ctrl_args *)buf); break; } case NVGPU_IOCTL_TSG_SET_RUNLIST_INTERLEAVE: err = gk20a_tsg_ioctl_set_runlist_interleave(g, tsg, (struct nvgpu_runlist_interleave_args *)buf); break; case NVGPU_IOCTL_TSG_SET_TIMESLICE: { err = gk20a_tsg_ioctl_set_timeslice(g, tsg, (struct nvgpu_timeslice_args *)buf); break; } case NVGPU_IOCTL_TSG_GET_TIMESLICE: { err = gk20a_tsg_ioctl_get_timeslice(g, tsg, (struct nvgpu_timeslice_args *)buf); break; } case NVGPU_TSG_IOCTL_READ_SINGLE_SM_ERROR_STATE: { err = gk20a_tsg_ioctl_read_single_sm_error_state(g, tsg, (struct nvgpu_tsg_read_single_sm_error_state_args *)buf); break; } default: nvgpu_err(g, "unrecognized tsg gpu ioctl cmd: 0x%x", cmd); err = -ENOTTY; break; } if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd)); return err; }