gpu: nvgpu: Add NVIDIA GPU Driver

This patch moves the NVIDIA GPU driver to a new location. Bug 1482562 Change-Id: I24293810b9d0f1504fd9be00135e21dad656ccb6 Signed-off-by: Arto Merilainen <amerilainen@nvidia.com> Reviewed-on: http://git-master/r/383722 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
author: Arto Merilainen <amerilainen@nvidia.com> 2014-03-19 03:38:25 -0400
committer: Dan Willemsen <dwillemsen@nvidia.com> 2015-03-18 15:08:53 -0400
commit: a9785995d5f22aaeb659285f8aeb64d8b56982e0 (patch)
tree: cc75f75bcf43db316a002a7a240b81f299bf6d7f /drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
parent: 61efaf843c22b85424036ec98015121c08f5f16c (diff)
1 files changed, 699 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
new file mode 100644
index 00000000..da7d733e
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
@@ -0,0 +1,699 @@
+/*
+ * Tegra GK20A GPU Debugger/Profiler Driver
+ *
+ * Copyright (c) 2013-2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/cdev.h>
+#include <linux/uaccess.h>
+#include <linux/nvhost.h>
+#include <linux/nvhost_dbg_gpu_ioctl.h>
+#include "gk20a.h"
+#include "gr_gk20a.h"
+#include "dbg_gpu_gk20a.h"
+#include "regops_gk20a.h"
+#include "hw_therm_gk20a.h"
+struct dbg_gpu_session_ops dbg_gpu_session_ops_gk20a = {
+        .exec_reg_ops = exec_regops_gk20a,
+};
+/* silly allocator - just increment session id */
+static atomic_t session_id = ATOMIC_INIT(0);
+static int generate_session_id(void)
+{
+        return atomic_add_return(1, &session_id);
+}
+static int alloc_session(struct dbg_session_gk20a **_dbg_s)
+{
+        struct dbg_session_gk20a *dbg_s;
+        *_dbg_s = NULL;
+        gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
+        dbg_s = kzalloc(sizeof(*dbg_s), GFP_KERNEL);
+        if (!dbg_s)
+                return -ENOMEM;
+        dbg_s->id = generate_session_id();
+        dbg_s->ops = &dbg_gpu_session_ops_gk20a;
+        *_dbg_s = dbg_s;
+        return 0;
+}
+int gk20a_dbg_gpu_do_dev_open(struct inode *inode, struct file *filp, bool is_profiler)
+{
+        struct dbg_session_gk20a *dbg_session;
+        struct gk20a *g;
+        struct platform_device *pdev;
+        struct device *dev;
+        int err;
+        if (!is_profiler)
+                g = container_of(inode->i_cdev,
+                                 struct gk20a, dbg.cdev);
+        else
+                g = container_of(inode->i_cdev,
+                                 struct gk20a, prof.cdev);
+        pdev = g->dev;
+        dev  = &pdev->dev;
+        gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "dbg session: %s", dev_name(dev));
+        err  = alloc_session(&dbg_session);
+        if (err)
+                return err;
+        filp->private_data = dbg_session;
+        dbg_session->pdev  = pdev;
+        dbg_session->dev   = dev;
+        dbg_session->g     = g;
+        dbg_session->is_profiler = is_profiler;
+        dbg_session->is_pg_disabled = false;
+        INIT_LIST_HEAD(&dbg_session->dbg_s_list_node);
+        init_waitqueue_head(&dbg_session->dbg_events.wait_queue);
+        dbg_session->dbg_events.events_enabled = false;
+        dbg_session->dbg_events.num_pending_events = 0;
+        return 0;
+}
+/* used in scenarios where the debugger session can take just the inter-session
+ * lock for performance, but the profiler session must take the per-gpu lock
+ * since it might not have an associated channel. */
+static void gk20a_dbg_session_mutex_lock(struct dbg_session_gk20a *dbg_s)
+{
+        if (dbg_s->is_profiler)
+                mutex_lock(&dbg_s->g->dbg_sessions_lock);
+        else
+                mutex_lock(&dbg_s->ch->dbg_s_lock);
+}
+static void gk20a_dbg_session_mutex_unlock(struct dbg_session_gk20a *dbg_s)
+{
+        if (dbg_s->is_profiler)
+                mutex_unlock(&dbg_s->g->dbg_sessions_lock);
+        else
+                mutex_unlock(&dbg_s->ch->dbg_s_lock);
+}
+static void gk20a_dbg_gpu_events_enable(struct dbg_session_gk20a *dbg_s)
+{
+        gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
+        gk20a_dbg_session_mutex_lock(dbg_s);
+        dbg_s->dbg_events.events_enabled = true;
+        dbg_s->dbg_events.num_pending_events = 0;
+        gk20a_dbg_session_mutex_unlock(dbg_s);
+}
+static void gk20a_dbg_gpu_events_disable(struct dbg_session_gk20a *dbg_s)
+{
+        gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
+        gk20a_dbg_session_mutex_lock(dbg_s);
+        dbg_s->dbg_events.events_enabled = false;
+        dbg_s->dbg_events.num_pending_events = 0;
+        gk20a_dbg_session_mutex_unlock(dbg_s);
+}
+static void gk20a_dbg_gpu_events_clear(struct dbg_session_gk20a *dbg_s)
+{
+        gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
+        gk20a_dbg_session_mutex_lock(dbg_s);
+        if (dbg_s->dbg_events.events_enabled &&
+                        dbg_s->dbg_events.num_pending_events > 0)
+                dbg_s->dbg_events.num_pending_events--;
+        gk20a_dbg_session_mutex_unlock(dbg_s);
+}
+static int gk20a_dbg_gpu_events_ctrl(struct dbg_session_gk20a *dbg_s,
+                          struct nvhost_dbg_gpu_events_ctrl_args *args)
+{
+        int ret = 0;
+        gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "dbg events ctrl cmd %d", args->cmd);
+        if (!dbg_s->ch) {
+                gk20a_err(dev_from_gk20a(dbg_s->g),
+                           "no channel bound to dbg session\n");
+                return -EINVAL;
+        }
+        switch (args->cmd) {
+        case NVHOST_DBG_GPU_EVENTS_CTRL_CMD_ENABLE:
+                gk20a_dbg_gpu_events_enable(dbg_s);
+                break;
+        case NVHOST_DBG_GPU_EVENTS_CTRL_CMD_DISABLE:
+                gk20a_dbg_gpu_events_disable(dbg_s);
+                break;
+        case NVHOST_DBG_GPU_EVENTS_CTRL_CMD_CLEAR:
+                gk20a_dbg_gpu_events_clear(dbg_s);
+                break;
+        default:
+                gk20a_err(dev_from_gk20a(dbg_s->g),
+                           "unrecognized dbg gpu events ctrl cmd: 0x%x",
+                           args->cmd);
+                ret = -EINVAL;
+                break;
+        }
+        return ret;
+}
+unsigned int gk20a_dbg_gpu_dev_poll(struct file *filep, poll_table *wait)
+{
+        unsigned int mask = 0;
+        struct dbg_session_gk20a *dbg_s = filep->private_data;
+        gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
+        poll_wait(filep, &dbg_s->dbg_events.wait_queue, wait);
+        gk20a_dbg_session_mutex_lock(dbg_s);
+        if (dbg_s->dbg_events.events_enabled &&
+                        dbg_s->dbg_events.num_pending_events > 0) {
+                gk20a_dbg(gpu_dbg_gpu_dbg, "found pending event on session id %d",
+                                dbg_s->id);
+                gk20a_dbg(gpu_dbg_gpu_dbg, "%d events pending",
+                                dbg_s->dbg_events.num_pending_events);
+                mask = (POLLPRI | POLLIN);
+        }
+        gk20a_dbg_session_mutex_unlock(dbg_s);
+        return mask;
+}
+int gk20a_dbg_gpu_dev_open(struct inode *inode, struct file *filp)
+{
+        gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
+        return gk20a_dbg_gpu_do_dev_open(inode, filp, false /* not profiler */);
+}
+int gk20a_prof_gpu_dev_open(struct inode *inode, struct file *filp)
+{
+        gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
+        return gk20a_dbg_gpu_do_dev_open(inode, filp, true /* is profiler */);
+}
+void gk20a_dbg_gpu_post_events(struct channel_gk20a *ch)
+{
+        struct dbg_session_gk20a *dbg_s;
+        gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
+        /* guard against the session list being modified */
+        mutex_lock(&ch->dbg_s_lock);
+        list_for_each_entry(dbg_s, &ch->dbg_s_list, dbg_s_list_node) {
+                if (dbg_s->dbg_events.events_enabled) {
+                        gk20a_dbg(gpu_dbg_gpu_dbg, "posting event on session id %d",
+                                        dbg_s->id);
+                        gk20a_dbg(gpu_dbg_gpu_dbg, "%d events pending",
+                                        dbg_s->dbg_events.num_pending_events);
+                        dbg_s->dbg_events.num_pending_events++;
+                        wake_up_interruptible_all(&dbg_s->dbg_events.wait_queue);
+                }
+        }
+        mutex_unlock(&ch->dbg_s_lock);
+}
+static int dbg_set_powergate(struct dbg_session_gk20a *dbg_s,
+                                __u32  powermode);
+static int dbg_unbind_channel_gk20a(struct dbg_session_gk20a *dbg_s)
+{
+        struct channel_gk20a *ch_gk20a = dbg_s->ch;
+        struct gk20a *g = dbg_s->g;
+        gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
+        /* wasn't bound to start with ? */
+        if (!ch_gk20a) {
+                gk20a_dbg(gpu_dbg_gpu_dbg | gpu_dbg_fn, "not bound already?");
+                return -ENODEV;
+        }
+        mutex_lock(&g->dbg_sessions_lock);
+        mutex_lock(&ch_gk20a->dbg_s_lock);
+        --g->dbg_sessions;
+        /* Powergate enable is called here as possibility of dbg_session
+         * which called powergate disable ioctl, to be killed without calling
+         * powergate enable ioctl
+         */
+        dbg_set_powergate(dbg_s, NVHOST_DBG_GPU_POWERGATE_MODE_ENABLE);
+        dbg_s->ch = NULL;
+        fput(dbg_s->ch_f);
+        dbg_s->ch_f = NULL;
+        list_del_init(&dbg_s->dbg_s_list_node);
+        mutex_unlock(&ch_gk20a->dbg_s_lock);
+        mutex_unlock(&g->dbg_sessions_lock);
+        return 0;
+}
+int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp)
+{
+        struct dbg_session_gk20a *dbg_s = filp->private_data;
+        gk20a_dbg(gpu_dbg_gpu_dbg | gpu_dbg_fn, "%s", dev_name(dbg_s->dev));
+        /* unbind if it was bound */
+        if (!dbg_s->ch)
+                return 0;
+        dbg_unbind_channel_gk20a(dbg_s);
+        kfree(dbg_s);
+        return 0;
+}
+static int dbg_bind_channel_gk20a(struct dbg_session_gk20a *dbg_s,
+                          struct nvhost_dbg_gpu_bind_channel_args *args)
+{
+        struct file *f;
+        struct gk20a *g;
+        struct channel_gk20a *ch;
+        gk20a_dbg(gpu_dbg_fn|gpu_dbg_gpu_dbg, "%s fd=%d",
+                   dev_name(dbg_s->dev), args->channel_fd);
+        if (args->channel_fd == ~0)
+                return dbg_unbind_channel_gk20a(dbg_s);
+        /* even though get_file_channel is doing this it releases it as well */
+        /* by holding it here we'll keep it from disappearing while the
+         * debugger is in session */
+        f = fget(args->channel_fd);
+        if (!f)
+                return -ENODEV;
+        ch = gk20a_get_channel_from_file(args->channel_fd);
+        if (!ch) {
+                gk20a_dbg_fn("no channel found for fd");
+                fput(f);
+                return -EINVAL;
+        }
+        g = dbg_s->g;
+        gk20a_dbg_fn("%s hwchid=%d", dev_name(dbg_s->dev), ch->hw_chid);
+        mutex_lock(&g->dbg_sessions_lock);
+        mutex_lock(&ch->dbg_s_lock);
+        dbg_s->ch_f = f;
+        dbg_s->ch = ch;
+        list_add(&dbg_s->dbg_s_list_node, &dbg_s->ch->dbg_s_list);
+        g->dbg_sessions++;
+        mutex_unlock(&ch->dbg_s_lock);
+        mutex_unlock(&g->dbg_sessions_lock);
+        return 0;
+}
+static int nvhost_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,
+                                struct nvhost_dbg_gpu_exec_reg_ops_args *args);
+static int nvhost_ioctl_powergate_gk20a(struct dbg_session_gk20a *dbg_s,
+                                struct nvhost_dbg_gpu_powergate_args *args);
+static int nvhost_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
+                              struct nvhost_dbg_gpu_smpc_ctxsw_mode_args *args);
+long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
+                             unsigned long arg)
+{
+        struct dbg_session_gk20a *dbg_s = filp->private_data;
+        struct gk20a *g = get_gk20a(dbg_s->pdev);
+        u8 buf[NVHOST_DBG_GPU_IOCTL_MAX_ARG_SIZE];
+        int err = 0;
+        gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
+        if ((_IOC_TYPE(cmd) != NVHOST_DBG_GPU_IOCTL_MAGIC) ||
+            (_IOC_NR(cmd) == 0) ||
+            (_IOC_NR(cmd) > NVHOST_DBG_GPU_IOCTL_LAST))
+                return -EFAULT;
+        BUG_ON(_IOC_SIZE(cmd) > NVHOST_DBG_GPU_IOCTL_MAX_ARG_SIZE);
+        if (_IOC_DIR(cmd) & _IOC_WRITE) {
+                if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
+                        return -EFAULT;
+        }
+        if (!g->gr.sw_ready) {
+                err = gk20a_busy(g->dev);
+                if (err)
+                        return err;
+                gk20a_idle(g->dev);
+        }
+        switch (cmd) {
+        case NVHOST_DBG_GPU_IOCTL_BIND_CHANNEL:
+                err = dbg_bind_channel_gk20a(dbg_s,
+                             (struct nvhost_dbg_gpu_bind_channel_args *)buf);
+                gk20a_dbg(gpu_dbg_gpu_dbg, "ret=%d", err);
+                break;
+        case NVHOST_DBG_GPU_IOCTL_REG_OPS:
+                err = nvhost_ioctl_channel_reg_ops(dbg_s,
+                           (struct nvhost_dbg_gpu_exec_reg_ops_args *)buf);
+                gk20a_dbg(gpu_dbg_gpu_dbg, "ret=%d", err);
+                break;
+        case NVHOST_DBG_GPU_IOCTL_POWERGATE:
+                err = nvhost_ioctl_powergate_gk20a(dbg_s,
+                           (struct nvhost_dbg_gpu_powergate_args *)buf);
+                gk20a_dbg(gpu_dbg_gpu_dbg, "ret=%d", err);
+                break;
+        case NVHOST_DBG_GPU_IOCTL_EVENTS_CTRL:
+                err = gk20a_dbg_gpu_events_ctrl(dbg_s,
+                           (struct nvhost_dbg_gpu_events_ctrl_args *)buf);
+                break;
+        case NVHOST_DBG_GPU_IOCTL_SMPC_CTXSW_MODE:
+                err = nvhost_dbg_gpu_ioctl_smpc_ctxsw_mode(dbg_s,
+                           (struct nvhost_dbg_gpu_smpc_ctxsw_mode_args *)buf);
+                break;
+        default:
+                gk20a_err(dev_from_gk20a(g),
+                           "unrecognized dbg gpu ioctl cmd: 0x%x",
+                           cmd);
+                err = -ENOTTY;
+                break;
+        }
+        if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
+                err = copy_to_user((void __user *)arg,
+                                   buf, _IOC_SIZE(cmd));
+        return err;
+}
+/* In order to perform a context relative op the context has
+ * to be created already... which would imply that the
+ * context switch mechanism has already been put in place.
+ * So by the time we perform such an opertation it should always
+ * be possible to query for the appropriate context offsets, etc.
+ *
+ * But note: while the dbg_gpu bind requires the a channel fd,
+ * it doesn't require an allocated gr/compute obj at that point...
+ */
+static bool gr_context_info_available(struct dbg_session_gk20a *dbg_s,
+                                      struct gr_gk20a *gr)
+{
+        int err;
+        mutex_lock(&gr->ctx_mutex);
+        err = !gr->ctx_vars.golden_image_initialized;
+        mutex_unlock(&gr->ctx_mutex);
+        if (err)
+                return false;
+        return true;
+}
+static int nvhost_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,
+                                struct nvhost_dbg_gpu_exec_reg_ops_args *args)
+{
+        int err;
+        struct device *dev = dbg_s->dev;
+        struct gk20a *g = get_gk20a(dbg_s->pdev);
+        struct nvhost_dbg_gpu_reg_op *ops;
+        u64 ops_size = sizeof(ops[0]) * args->num_ops;
+        gk20a_dbg_fn("%d ops, total size %llu", args->num_ops, ops_size);
+        if (!dbg_s->ops) {
+                gk20a_err(dev, "can't call reg_ops on an unbound debugger session");
+                return -EINVAL;
+        }
+        if (!dbg_s->is_profiler && !dbg_s->ch) {
+                gk20a_err(dev, "bind a channel before regops for a debugging session");
+                return -EINVAL;
+        }
+        /* be sure that ctx info is in place */
+        if (!gr_context_info_available(dbg_s, &g->gr)) {
+                gk20a_err(dev, "gr context data not available\n");
+                return -ENODEV;
+        }
+        ops = kzalloc(ops_size, GFP_KERNEL);
+        if (!ops) {
+                gk20a_err(dev, "Allocating memory failed!");
+                return -ENOMEM;
+        }
+        gk20a_dbg_fn("Copying regops from userspace");
+        if (copy_from_user(ops, (void *)(uintptr_t)args->ops, ops_size)) {
+                dev_err(dev, "copy_from_user failed!");
+                err = -EFAULT;
+                goto clean_up;
+        }
+        /* since exec_reg_ops sends methods to the ucode, it must take the
+         * global gpu lock to protect against mixing methods from debug sessions
+         * on other channels */
+        mutex_lock(&g->dbg_sessions_lock);
+        err = dbg_s->ops->exec_reg_ops(dbg_s, ops, args->num_ops);
+        mutex_unlock(&g->dbg_sessions_lock);
+        if (err) {
+                gk20a_err(dev, "dbg regops failed");
+                goto clean_up;
+        }
+        gk20a_dbg_fn("Copying result to userspace");
+        if (copy_to_user((void *)(uintptr_t)args->ops, ops, ops_size)) {
+                dev_err(dev, "copy_to_user failed!");
+                err = -EFAULT;
+                goto clean_up;
+        }
+        return 0;
+ clean_up:
+        kfree(ops);
+        return err;
+}
+static int dbg_set_powergate(struct dbg_session_gk20a *dbg_s,
+                                __u32  powermode)
+{
+        int err = 0;
+        struct gk20a *g = get_gk20a(dbg_s->pdev);
+         /* This function must be called with g->dbg_sessions_lock held */
+        gk20a_dbg(gpu_dbg_fn|gpu_dbg_gpu_dbg, "%s powergate mode = %d",
+                   dev_name(dbg_s->dev), powermode);
+        switch (powermode) {
+        case NVHOST_DBG_GPU_POWERGATE_MODE_DISABLE:
+                /* save off current powergate, clk state.
+                 * set gpu module's can_powergate = 0.
+                 * set gpu module's clk to max.
+                 * while *a* debug session is active there will be no power or
+                 * clocking state changes allowed from mainline code (but they
+                 * should be saved).
+                 */
+                /* Allow powergate disable if the current dbg_session doesn't
+                 * call a powergate disable ioctl and the global
+                 * powergating_disabled_refcount is zero
+                 */
+                if ((dbg_s->is_pg_disabled == false) &&
+                    (g->dbg_powergating_disabled_refcount++ == 0)) {
+                        gk20a_dbg(gpu_dbg_gpu_dbg | gpu_dbg_fn, "module busy");
+                        gk20a_busy(g->dev);
+                        gk20a_channel_busy(dbg_s->pdev);
+                        g->ops.clock_gating.slcg_gr_load_gating_prod(g,
+                                        false);
+                        g->ops.clock_gating.slcg_perf_load_gating_prod(g,
+                                        false);
+                        gr_gk20a_init_blcg_mode(g, BLCG_RUN, ENGINE_GR_GK20A);
+                        g->elcg_enabled = false;
+                        gr_gk20a_init_elcg_mode(g, ELCG_RUN, ENGINE_GR_GK20A);
+                        gr_gk20a_init_elcg_mode(g, ELCG_RUN, ENGINE_CE2_GK20A);
+                        gk20a_pmu_disable_elpg(g);
+                }
+                dbg_s->is_pg_disabled = true;
+                break;
+        case NVHOST_DBG_GPU_POWERGATE_MODE_ENABLE:
+                /* restore (can) powergate, clk state */
+                /* release pending exceptions to fault/be handled as usual */
+                /*TBD: ordering of these? */
+                /* Re-enabling powergate as no other sessions want
+                 * powergate disabled and the current dbg-sessions had
+                 * requested the powergate disable through ioctl
+                */
+                if (dbg_s->is_pg_disabled &&
+                    --g->dbg_powergating_disabled_refcount == 0) {
+                        g->elcg_enabled = true;
+                        gr_gk20a_init_elcg_mode(g, ELCG_AUTO, ENGINE_GR_GK20A);
+                        gr_gk20a_init_elcg_mode(g, ELCG_AUTO, ENGINE_CE2_GK20A);
+                        gr_gk20a_init_blcg_mode(g, BLCG_AUTO, ENGINE_GR_GK20A);
+                        g->ops.clock_gating.slcg_gr_load_gating_prod(g,
+                                        g->slcg_enabled);
+                        g->ops.clock_gating.slcg_perf_load_gating_prod(g,
+                                        g->slcg_enabled);
+                        gk20a_pmu_enable_elpg(g);
+                        gk20a_dbg(gpu_dbg_gpu_dbg | gpu_dbg_fn, "module idle");
+                        gk20a_channel_idle(dbg_s->pdev);
+                        gk20a_idle(g->dev);
+                }
+                dbg_s->is_pg_disabled = false;
+                break;
+        default:
+                gk20a_err(dev_from_gk20a(g),
+                           "unrecognized dbg gpu powergate mode: 0x%x",
+                           powermode);
+                err = -ENOTTY;
+                break;
+        }
+        return err;
+}
+static int nvhost_ioctl_powergate_gk20a(struct dbg_session_gk20a *dbg_s,
+                                struct nvhost_dbg_gpu_powergate_args *args)
+{
+        int err;
+        struct gk20a *g = get_gk20a(dbg_s->pdev);
+        gk20a_dbg_fn("%s  powergate mode = %d",
+                      dev_name(dbg_s->dev), args->mode);
+        mutex_lock(&g->dbg_sessions_lock);
+        err = dbg_set_powergate(dbg_s, args->mode);
+        mutex_unlock(&g->dbg_sessions_lock);
+        return  err;
+}
+static int nvhost_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
+                               struct nvhost_dbg_gpu_smpc_ctxsw_mode_args *args)
+{
+        int err;
+        struct gk20a *g = get_gk20a(dbg_s->pdev);
+        struct channel_gk20a *ch_gk20a;
+        gk20a_dbg_fn("%s smpc ctxsw mode = %d",
+                     dev_name(dbg_s->dev), args->mode);
+        /* Take the global lock, since we'll be doing global regops */
+        mutex_lock(&g->dbg_sessions_lock);
+        ch_gk20a = dbg_s->ch;
+        if (!ch_gk20a) {
+                gk20a_err(dev_from_gk20a(dbg_s->g),
+                          "no bound channel for smpc ctxsw mode update\n");
+                err = -EINVAL;
+                goto clean_up;
+        }
+        err = gr_gk20a_update_smpc_ctxsw_mode(g, ch_gk20a,
+                      args->mode == NVHOST_DBG_GPU_SMPC_CTXSW_MODE_CTXSW);
+        if (err) {
+                gk20a_err(dev_from_gk20a(dbg_s->g),
+                          "error (%d) during smpc ctxsw mode update\n", err);
+                goto clean_up;
+        }
+        /* The following regops are a hack/war to make up for the fact that we
+         * just scribbled into the ctxsw image w/o really knowing whether
+         * it was already swapped out in/out once or not, etc.
+         */
+        {
+                struct nvhost_dbg_gpu_reg_op ops[4];
+                int i;
+                for (i = 0; i < ARRAY_SIZE(ops); i++) {
+                        ops[i].op     = NVHOST_DBG_GPU_REG_OP_WRITE_32;
+                        ops[i].type   = NVHOST_DBG_GPU_REG_OP_TYPE_GR_CTX;
+                        ops[i].status = NVHOST_DBG_GPU_REG_OP_STATUS_SUCCESS;
+                        ops[i].value_hi      = 0;
+                        ops[i].and_n_mask_lo = 0;
+                        ops[i].and_n_mask_hi = 0;
+                }
+                /* gr_pri_gpcs_tpcs_sm_dsm_perf_counter_control_sel1_r();*/
+                ops[0].offset   = 0x00419e08;
+                ops[0].value_lo = 0x1d;
+                /* gr_pri_gpcs_tpcs_sm_dsm_perf_counter_control5_r(); */
+                ops[1].offset   = 0x00419e58;
+                ops[1].value_lo = 0x1;
+                /* gr_pri_gpcs_tpcs_sm_dsm_perf_counter_control3_r(); */
+                ops[2].offset   = 0x00419e68;
+                ops[2].value_lo = 0xaaaa;
+                /* gr_pri_gpcs_tpcs_sm_dsm_perf_counter4_control_r(); */
+                ops[3].offset   = 0x00419f40;
+                ops[3].value_lo = 0x18;
+                err = dbg_s->ops->exec_reg_ops(dbg_s, ops, ARRAY_SIZE(ops));
+        }
+ clean_up:
+        mutex_unlock(&g->dbg_sessions_lock);
+        return  err;
+}
author	Arto Merilainen <amerilainen@nvidia.com>	2014-03-19 03:38:25 -0400
committer	Dan Willemsen <dwillemsen@nvidia.com>	2015-03-18 15:08:53 -0400
commit	a9785995d5f22aaeb659285f8aeb64d8b56982e0 (patch)
tree	cc75f75bcf43db316a002a7a240b81f299bf6d7f /drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
parent	61efaf843c22b85424036ec98015121c08f5f16c (diff)

diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c new file mode 100644 index 00000000..da7d733e --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
@@ -0,0 +1,699 @@
	1	/*
	2	* Tegra GK20A GPU Debugger/Profiler Driver
	3	*
	4	* Copyright (c) 2013-2014, NVIDIA CORPORATION. All rights reserved.
	5	*
	6	* This program is free software; you can redistribute it and/or modify it
	7	* under the terms and conditions of the GNU General Public License,
	8	* version 2, as published by the Free Software Foundation.
	9	*
	10	* This program is distributed in the hope it will be useful, but WITHOUT
	11	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
	12	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
	13	* more details.
	14	*
	15	* You should have received a copy of the GNU General Public License
	16	* along with this program. If not, see <http://www.gnu.org/licenses/>.
	17	*/
	18
	19	#include <linux/fs.h>
	20	#include <linux/file.h>
	21	#include <linux/cdev.h>
	22	#include <linux/uaccess.h>
	23	#include <linux/nvhost.h>
	24	#include <linux/nvhost_dbg_gpu_ioctl.h>
	25
	26	#include "gk20a.h"
	27	#include "gr_gk20a.h"
	28	#include "dbg_gpu_gk20a.h"
	29	#include "regops_gk20a.h"
	30	#include "hw_therm_gk20a.h"
	31
	32	struct dbg_gpu_session_ops dbg_gpu_session_ops_gk20a = {
	33	.exec_reg_ops = exec_regops_gk20a,
	34	};
	35
	36	/* silly allocator - just increment session id */
	37	static atomic_t session_id = ATOMIC_INIT(0);
	38	static int generate_session_id(void)
	39	{
	40	return atomic_add_return(1, &session_id);
	41	}
	42
	43	static int alloc_session(struct dbg_session_gk20a **_dbg_s)
	44	{
	45	struct dbg_session_gk20a *dbg_s;
	46	*_dbg_s = NULL;
	47
	48	gk20a_dbg(gpu_dbg_fn \| gpu_dbg_gpu_dbg, "");
	49
	50	dbg_s = kzalloc(sizeof(*dbg_s), GFP_KERNEL);
	51	if (!dbg_s)
	52	return -ENOMEM;
	53
	54	dbg_s->id = generate_session_id();
	55	dbg_s->ops = &dbg_gpu_session_ops_gk20a;
	56	*_dbg_s = dbg_s;
	57	return 0;
	58	}
	59
	60	int gk20a_dbg_gpu_do_dev_open(struct inode inode, struct file filp, bool is_profiler)
	61	{
	62	struct dbg_session_gk20a *dbg_session;
	63	struct gk20a *g;
	64
	65	struct platform_device *pdev;
	66	struct device *dev;
	67
	68	int err;
	69
	70	if (!is_profiler)
	71	g = container_of(inode->i_cdev,
	72	struct gk20a, dbg.cdev);
	73	else
	74	g = container_of(inode->i_cdev,
	75	struct gk20a, prof.cdev);
	76	pdev = g->dev;
	77	dev = &pdev->dev;
	78
	79	gk20a_dbg(gpu_dbg_fn \| gpu_dbg_gpu_dbg, "dbg session: %s", dev_name(dev));
	80
	81	err = alloc_session(&dbg_session);
	82	if (err)
	83	return err;
	84
	85	filp->private_data = dbg_session;
	86	dbg_session->pdev = pdev;
	87	dbg_session->dev = dev;
	88	dbg_session->g = g;
	89	dbg_session->is_profiler = is_profiler;
	90	dbg_session->is_pg_disabled = false;
	91
	92	INIT_LIST_HEAD(&dbg_session->dbg_s_list_node);
	93	init_waitqueue_head(&dbg_session->dbg_events.wait_queue);
	94	dbg_session->dbg_events.events_enabled = false;
	95	dbg_session->dbg_events.num_pending_events = 0;
	96
	97	return 0;
	98	}
	99
	100	/* used in scenarios where the debugger session can take just the inter-session
	101	* lock for performance, but the profiler session must take the per-gpu lock
	102	* since it might not have an associated channel. */
	103	static void gk20a_dbg_session_mutex_lock(struct dbg_session_gk20a *dbg_s)
	104	{
	105	if (dbg_s->is_profiler)
	106	mutex_lock(&dbg_s->g->dbg_sessions_lock);
	107	else
	108	mutex_lock(&dbg_s->ch->dbg_s_lock);
	109	}
	110
	111	static void gk20a_dbg_session_mutex_unlock(struct dbg_session_gk20a *dbg_s)
	112	{
	113	if (dbg_s->is_profiler)
	114	mutex_unlock(&dbg_s->g->dbg_sessions_lock);
	115	else
	116	mutex_unlock(&dbg_s->ch->dbg_s_lock);
	117	}
	118
	119	static void gk20a_dbg_gpu_events_enable(struct dbg_session_gk20a *dbg_s)
	120	{
	121	gk20a_dbg(gpu_dbg_fn \| gpu_dbg_gpu_dbg, "");
	122
	123	gk20a_dbg_session_mutex_lock(dbg_s);
	124
	125	dbg_s->dbg_events.events_enabled = true;
	126	dbg_s->dbg_events.num_pending_events = 0;
	127
	128	gk20a_dbg_session_mutex_unlock(dbg_s);
	129	}
	130
	131	static void gk20a_dbg_gpu_events_disable(struct dbg_session_gk20a *dbg_s)
	132	{
	133	gk20a_dbg(gpu_dbg_fn \| gpu_dbg_gpu_dbg, "");
	134
	135	gk20a_dbg_session_mutex_lock(dbg_s);
	136
	137	dbg_s->dbg_events.events_enabled = false;
	138	dbg_s->dbg_events.num_pending_events = 0;
	139
	140	gk20a_dbg_session_mutex_unlock(dbg_s);
	141	}
	142
	143	static void gk20a_dbg_gpu_events_clear(struct dbg_session_gk20a *dbg_s)
	144	{
	145	gk20a_dbg(gpu_dbg_fn \| gpu_dbg_gpu_dbg, "");
	146
	147	gk20a_dbg_session_mutex_lock(dbg_s);
	148
	149	if (dbg_s->dbg_events.events_enabled &&
	150	dbg_s->dbg_events.num_pending_events > 0)
	151	dbg_s->dbg_events.num_pending_events--;
	152
	153	gk20a_dbg_session_mutex_unlock(dbg_s);
	154	}
	155
	156	static int gk20a_dbg_gpu_events_ctrl(struct dbg_session_gk20a *dbg_s,
	157	struct nvhost_dbg_gpu_events_ctrl_args *args)
	158	{
	159	int ret = 0;
	160
	161	gk20a_dbg(gpu_dbg_fn \| gpu_dbg_gpu_dbg, "dbg events ctrl cmd %d", args->cmd);
	162
	163	if (!dbg_s->ch) {
	164	gk20a_err(dev_from_gk20a(dbg_s->g),
	165	"no channel bound to dbg session\n");
	166	return -EINVAL;
	167	}
	168
	169	switch (args->cmd) {
	170	case NVHOST_DBG_GPU_EVENTS_CTRL_CMD_ENABLE:
	171	gk20a_dbg_gpu_events_enable(dbg_s);
	172	break;
	173
	174	case NVHOST_DBG_GPU_EVENTS_CTRL_CMD_DISABLE:
	175	gk20a_dbg_gpu_events_disable(dbg_s);
	176	break;
	177
	178	case NVHOST_DBG_GPU_EVENTS_CTRL_CMD_CLEAR:
	179	gk20a_dbg_gpu_events_clear(dbg_s);
	180	break;
	181
	182	default:
	183	gk20a_err(dev_from_gk20a(dbg_s->g),
	184	"unrecognized dbg gpu events ctrl cmd: 0x%x",
	185	args->cmd);
	186	ret = -EINVAL;
	187	break;
	188	}
	189
	190	return ret;
	191	}
	192
	193	unsigned int gk20a_dbg_gpu_dev_poll(struct file filep, poll_table wait)
	194	{
	195	unsigned int mask = 0;
	196	struct dbg_session_gk20a *dbg_s = filep->private_data;
	197
	198	gk20a_dbg(gpu_dbg_fn \| gpu_dbg_gpu_dbg, "");
	199
	200	poll_wait(filep, &dbg_s->dbg_events.wait_queue, wait);
	201
	202	gk20a_dbg_session_mutex_lock(dbg_s);
	203
	204	if (dbg_s->dbg_events.events_enabled &&
	205	dbg_s->dbg_events.num_pending_events > 0) {
	206	gk20a_dbg(gpu_dbg_gpu_dbg, "found pending event on session id %d",
	207	dbg_s->id);
	208	gk20a_dbg(gpu_dbg_gpu_dbg, "%d events pending",
	209	dbg_s->dbg_events.num_pending_events);
	210	mask = (POLLPRI \| POLLIN);
	211	}
	212
	213	gk20a_dbg_session_mutex_unlock(dbg_s);
	214
	215	return mask;
	216	}
	217
	218	int gk20a_dbg_gpu_dev_open(struct inode inode, struct file filp)
	219	{
	220	gk20a_dbg(gpu_dbg_fn \| gpu_dbg_gpu_dbg, "");
	221	return gk20a_dbg_gpu_do_dev_open(inode, filp, false /* not profiler */);
	222	}
	223
	224	int gk20a_prof_gpu_dev_open(struct inode inode, struct file filp)
	225	{
	226	gk20a_dbg(gpu_dbg_fn \| gpu_dbg_gpu_dbg, "");
	227	return gk20a_dbg_gpu_do_dev_open(inode, filp, true /* is profiler */);
	228	}
	229
	230	void gk20a_dbg_gpu_post_events(struct channel_gk20a *ch)
	231	{
	232	struct dbg_session_gk20a *dbg_s;
	233
	234	gk20a_dbg(gpu_dbg_fn \| gpu_dbg_gpu_dbg, "");
	235
	236	/* guard against the session list being modified */
	237	mutex_lock(&ch->dbg_s_lock);
	238
	239	list_for_each_entry(dbg_s, &ch->dbg_s_list, dbg_s_list_node) {
	240	if (dbg_s->dbg_events.events_enabled) {
	241	gk20a_dbg(gpu_dbg_gpu_dbg, "posting event on session id %d",
	242	dbg_s->id);
	243	gk20a_dbg(gpu_dbg_gpu_dbg, "%d events pending",
	244	dbg_s->dbg_events.num_pending_events);
	245
	246	dbg_s->dbg_events.num_pending_events++;
	247
	248	wake_up_interruptible_all(&dbg_s->dbg_events.wait_queue);
	249	}
	250	}
	251
	252	mutex_unlock(&ch->dbg_s_lock);
	253	}
	254
	255
	256	static int dbg_set_powergate(struct dbg_session_gk20a *dbg_s,
	257	__u32 powermode);
	258
	259	static int dbg_unbind_channel_gk20a(struct dbg_session_gk20a *dbg_s)
	260	{
	261	struct channel_gk20a *ch_gk20a = dbg_s->ch;
	262	struct gk20a *g = dbg_s->g;
	263
	264	gk20a_dbg(gpu_dbg_fn \| gpu_dbg_gpu_dbg, "");
	265
	266	/* wasn't bound to start with ? */
	267	if (!ch_gk20a) {
	268	gk20a_dbg(gpu_dbg_gpu_dbg \| gpu_dbg_fn, "not bound already?");
	269	return -ENODEV;
	270	}
	271
	272	mutex_lock(&g->dbg_sessions_lock);
	273	mutex_lock(&ch_gk20a->dbg_s_lock);
	274
	275	--g->dbg_sessions;
	276
	277	/* Powergate enable is called here as possibility of dbg_session
	278	* which called powergate disable ioctl, to be killed without calling
	279	* powergate enable ioctl
	280	*/
	281	dbg_set_powergate(dbg_s, NVHOST_DBG_GPU_POWERGATE_MODE_ENABLE);
	282
	283	dbg_s->ch = NULL;
	284	fput(dbg_s->ch_f);
	285	dbg_s->ch_f = NULL;
	286
	287	list_del_init(&dbg_s->dbg_s_list_node);
	288
	289	mutex_unlock(&ch_gk20a->dbg_s_lock);
	290	mutex_unlock(&g->dbg_sessions_lock);
	291
	292	return 0;
	293	}
	294
	295	int gk20a_dbg_gpu_dev_release(struct inode inode, struct file filp)
	296	{
	297	struct dbg_session_gk20a *dbg_s = filp->private_data;
	298
	299	gk20a_dbg(gpu_dbg_gpu_dbg \| gpu_dbg_fn, "%s", dev_name(dbg_s->dev));
	300
	301	/* unbind if it was bound */
	302	if (!dbg_s->ch)
	303	return 0;
	304	dbg_unbind_channel_gk20a(dbg_s);
	305
	306	kfree(dbg_s);
	307	return 0;
	308	}
	309
	310	static int dbg_bind_channel_gk20a(struct dbg_session_gk20a *dbg_s,
	311	struct nvhost_dbg_gpu_bind_channel_args *args)
	312	{
	313	struct file *f;
	314	struct gk20a *g;
	315	struct channel_gk20a *ch;
	316
	317	gk20a_dbg(gpu_dbg_fn\|gpu_dbg_gpu_dbg, "%s fd=%d",
	318	dev_name(dbg_s->dev), args->channel_fd);
	319
	320	if (args->channel_fd == ~0)
	321	return dbg_unbind_channel_gk20a(dbg_s);
	322
	323	/* even though get_file_channel is doing this it releases it as well */
	324	/* by holding it here we'll keep it from disappearing while the
	325	* debugger is in session */
	326	f = fget(args->channel_fd);
	327	if (!f)
	328	return -ENODEV;
	329
	330	ch = gk20a_get_channel_from_file(args->channel_fd);
	331	if (!ch) {
	332	gk20a_dbg_fn("no channel found for fd");
	333	fput(f);
	334	return -EINVAL;
	335	}
	336
	337	g = dbg_s->g;
	338	gk20a_dbg_fn("%s hwchid=%d", dev_name(dbg_s->dev), ch->hw_chid);
	339
	340	mutex_lock(&g->dbg_sessions_lock);
	341	mutex_lock(&ch->dbg_s_lock);
	342
	343	dbg_s->ch_f = f;
	344	dbg_s->ch = ch;
	345	list_add(&dbg_s->dbg_s_list_node, &dbg_s->ch->dbg_s_list);
	346
	347	g->dbg_sessions++;
	348
	349	mutex_unlock(&ch->dbg_s_lock);
	350	mutex_unlock(&g->dbg_sessions_lock);
	351	return 0;
	352	}
	353
	354	static int nvhost_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,
	355	struct nvhost_dbg_gpu_exec_reg_ops_args *args);
	356
	357	static int nvhost_ioctl_powergate_gk20a(struct dbg_session_gk20a *dbg_s,
	358	struct nvhost_dbg_gpu_powergate_args *args);
	359
	360	static int nvhost_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
	361	struct nvhost_dbg_gpu_smpc_ctxsw_mode_args *args);
	362
	363	long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
	364	unsigned long arg)
	365	{
	366	struct dbg_session_gk20a *dbg_s = filp->private_data;
	367	struct gk20a *g = get_gk20a(dbg_s->pdev);
	368	u8 buf[NVHOST_DBG_GPU_IOCTL_MAX_ARG_SIZE];
	369	int err = 0;
	370
	371	gk20a_dbg(gpu_dbg_fn \| gpu_dbg_gpu_dbg, "");
	372
	373	if ((_IOC_TYPE(cmd) != NVHOST_DBG_GPU_IOCTL_MAGIC) \|\|
	374	(_IOC_NR(cmd) == 0) \|\|
	375	(_IOC_NR(cmd) > NVHOST_DBG_GPU_IOCTL_LAST))
	376	return -EFAULT;
	377
	378	BUG_ON(_IOC_SIZE(cmd) > NVHOST_DBG_GPU_IOCTL_MAX_ARG_SIZE);
	379
	380	if (_IOC_DIR(cmd) & _IOC_WRITE) {
	381	if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
	382	return -EFAULT;
	383	}
	384
	385	if (!g->gr.sw_ready) {
	386	err = gk20a_busy(g->dev);
	387	if (err)
	388	return err;
	389
	390	gk20a_idle(g->dev);
	391	}
	392
	393	switch (cmd) {
	394	case NVHOST_DBG_GPU_IOCTL_BIND_CHANNEL:
	395	err = dbg_bind_channel_gk20a(dbg_s,
	396	(struct nvhost_dbg_gpu_bind_channel_args *)buf);
	397	gk20a_dbg(gpu_dbg_gpu_dbg, "ret=%d", err);
	398	break;
	399
	400	case NVHOST_DBG_GPU_IOCTL_REG_OPS:
	401	err = nvhost_ioctl_channel_reg_ops(dbg_s,
	402	(struct nvhost_dbg_gpu_exec_reg_ops_args *)buf);
	403	gk20a_dbg(gpu_dbg_gpu_dbg, "ret=%d", err);
	404	break;
	405
	406	case NVHOST_DBG_GPU_IOCTL_POWERGATE:
	407	err = nvhost_ioctl_powergate_gk20a(dbg_s,
	408	(struct nvhost_dbg_gpu_powergate_args *)buf);
	409	gk20a_dbg(gpu_dbg_gpu_dbg, "ret=%d", err);
	410	break;
	411
	412	case NVHOST_DBG_GPU_IOCTL_EVENTS_CTRL:
	413	err = gk20a_dbg_gpu_events_ctrl(dbg_s,
	414	(struct nvhost_dbg_gpu_events_ctrl_args *)buf);
	415	break;
	416
	417	case NVHOST_DBG_GPU_IOCTL_SMPC_CTXSW_MODE:
	418	err = nvhost_dbg_gpu_ioctl_smpc_ctxsw_mode(dbg_s,
	419	(struct nvhost_dbg_gpu_smpc_ctxsw_mode_args *)buf);
	420	break;
	421
	422	default:
	423	gk20a_err(dev_from_gk20a(g),
	424	"unrecognized dbg gpu ioctl cmd: 0x%x",
	425	cmd);
	426	err = -ENOTTY;
	427	break;
	428	}
	429
	430	if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
	431	err = copy_to_user((void __user *)arg,
	432	buf, _IOC_SIZE(cmd));
	433
	434	return err;
	435	}
	436
	437	/* In order to perform a context relative op the context has
	438	* to be created already... which would imply that the
	439	* context switch mechanism has already been put in place.
	440	* So by the time we perform such an opertation it should always
	441	* be possible to query for the appropriate context offsets, etc.
	442	*
	443	* But note: while the dbg_gpu bind requires the a channel fd,
	444	* it doesn't require an allocated gr/compute obj at that point...
	445	*/
	446	static bool gr_context_info_available(struct dbg_session_gk20a *dbg_s,
	447	struct gr_gk20a *gr)
	448	{
	449	int err;
	450
	451	mutex_lock(&gr->ctx_mutex);
	452	err = !gr->ctx_vars.golden_image_initialized;
	453	mutex_unlock(&gr->ctx_mutex);
	454	if (err)
	455	return false;
	456	return true;
	457
	458	}
	459
	460	static int nvhost_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,
	461	struct nvhost_dbg_gpu_exec_reg_ops_args *args)
	462	{
	463	int err;
	464	struct device *dev = dbg_s->dev;
	465	struct gk20a *g = get_gk20a(dbg_s->pdev);
	466	struct nvhost_dbg_gpu_reg_op *ops;
	467	u64 ops_size = sizeof(ops[0]) * args->num_ops;
	468
	469	gk20a_dbg_fn("%d ops, total size %llu", args->num_ops, ops_size);
	470
	471	if (!dbg_s->ops) {
	472	gk20a_err(dev, "can't call reg_ops on an unbound debugger session");
	473	return -EINVAL;
	474	}
	475
	476	if (!dbg_s->is_profiler && !dbg_s->ch) {
	477	gk20a_err(dev, "bind a channel before regops for a debugging session");
	478	return -EINVAL;
	479	}
	480
	481	/* be sure that ctx info is in place */
	482	if (!gr_context_info_available(dbg_s, &g->gr)) {
	483	gk20a_err(dev, "gr context data not available\n");
	484	return -ENODEV;
	485	}
	486
	487	ops = kzalloc(ops_size, GFP_KERNEL);
	488	if (!ops) {
	489	gk20a_err(dev, "Allocating memory failed!");
	490	return -ENOMEM;
	491	}
	492
	493	gk20a_dbg_fn("Copying regops from userspace");
	494
	495	if (copy_from_user(ops, (void *)(uintptr_t)args->ops, ops_size)) {
	496	dev_err(dev, "copy_from_user failed!");
	497	err = -EFAULT;
	498	goto clean_up;
	499	}
	500
	501	/* since exec_reg_ops sends methods to the ucode, it must take the
	502	* global gpu lock to protect against mixing methods from debug sessions
	503	* on other channels */
	504	mutex_lock(&g->dbg_sessions_lock);
	505
	506	err = dbg_s->ops->exec_reg_ops(dbg_s, ops, args->num_ops);
	507
	508	mutex_unlock(&g->dbg_sessions_lock);
	509
	510	if (err) {
	511	gk20a_err(dev, "dbg regops failed");
	512	goto clean_up;
	513	}
	514
	515	gk20a_dbg_fn("Copying result to userspace");
	516
	517	if (copy_to_user((void *)(uintptr_t)args->ops, ops, ops_size)) {
	518	dev_err(dev, "copy_to_user failed!");
	519	err = -EFAULT;
	520	goto clean_up;
	521	}
	522	return 0;
	523	clean_up:
	524	kfree(ops);
	525	return err;
	526	}
	527
	528	static int dbg_set_powergate(struct dbg_session_gk20a *dbg_s,
	529	__u32 powermode)
	530	{
	531	int err = 0;
	532	struct gk20a *g = get_gk20a(dbg_s->pdev);
	533
	534	/* This function must be called with g->dbg_sessions_lock held */
	535
	536	gk20a_dbg(gpu_dbg_fn\|gpu_dbg_gpu_dbg, "%s powergate mode = %d",
	537	dev_name(dbg_s->dev), powermode);
	538
	539	switch (powermode) {
	540	case NVHOST_DBG_GPU_POWERGATE_MODE_DISABLE:
	541	/* save off current powergate, clk state.
	542	* set gpu module's can_powergate = 0.
	543	* set gpu module's clk to max.
	544	* while a debug session is active there will be no power or
	545	* clocking state changes allowed from mainline code (but they
	546	* should be saved).
	547	*/
	548	/* Allow powergate disable if the current dbg_session doesn't
	549	* call a powergate disable ioctl and the global
	550	* powergating_disabled_refcount is zero
	551	*/
	552
	553	if ((dbg_s->is_pg_disabled == false) &&
	554	(g->dbg_powergating_disabled_refcount++ == 0)) {
	555
	556	gk20a_dbg(gpu_dbg_gpu_dbg \| gpu_dbg_fn, "module busy");
	557	gk20a_busy(g->dev);
	558	gk20a_channel_busy(dbg_s->pdev);
	559
	560	g->ops.clock_gating.slcg_gr_load_gating_prod(g,
	561	false);
	562	g->ops.clock_gating.slcg_perf_load_gating_prod(g,
	563	false);
	564	gr_gk20a_init_blcg_mode(g, BLCG_RUN, ENGINE_GR_GK20A);
	565
	566	g->elcg_enabled = false;
	567	gr_gk20a_init_elcg_mode(g, ELCG_RUN, ENGINE_GR_GK20A);
	568	gr_gk20a_init_elcg_mode(g, ELCG_RUN, ENGINE_CE2_GK20A);
	569
	570	gk20a_pmu_disable_elpg(g);
	571	}
	572
	573	dbg_s->is_pg_disabled = true;
	574	break;
	575
	576	case NVHOST_DBG_GPU_POWERGATE_MODE_ENABLE:
	577	/* restore (can) powergate, clk state */
	578	/* release pending exceptions to fault/be handled as usual */
	579	/TBD: ordering of these? /
	580
	581	/* Re-enabling powergate as no other sessions want
	582	* powergate disabled and the current dbg-sessions had
	583	* requested the powergate disable through ioctl
	584	*/
	585	if (dbg_s->is_pg_disabled &&
	586	--g->dbg_powergating_disabled_refcount == 0) {
	587
	588	g->elcg_enabled = true;
	589	gr_gk20a_init_elcg_mode(g, ELCG_AUTO, ENGINE_GR_GK20A);
	590	gr_gk20a_init_elcg_mode(g, ELCG_AUTO, ENGINE_CE2_GK20A);
	591	gr_gk20a_init_blcg_mode(g, BLCG_AUTO, ENGINE_GR_GK20A);
	592
	593	g->ops.clock_gating.slcg_gr_load_gating_prod(g,
	594	g->slcg_enabled);
	595	g->ops.clock_gating.slcg_perf_load_gating_prod(g,
	596	g->slcg_enabled);
	597
	598	gk20a_pmu_enable_elpg(g);
	599
	600	gk20a_dbg(gpu_dbg_gpu_dbg \| gpu_dbg_fn, "module idle");
	601	gk20a_channel_idle(dbg_s->pdev);
	602	gk20a_idle(g->dev);
	603	}
	604
	605	dbg_s->is_pg_disabled = false;
	606	break;
	607
	608	default:
	609	gk20a_err(dev_from_gk20a(g),
	610	"unrecognized dbg gpu powergate mode: 0x%x",
	611	powermode);
	612	err = -ENOTTY;
	613	break;
	614	}
	615
	616	return err;
	617	}
	618
	619	static int nvhost_ioctl_powergate_gk20a(struct dbg_session_gk20a *dbg_s,
	620	struct nvhost_dbg_gpu_powergate_args *args)
	621	{
	622	int err;
	623	struct gk20a *g = get_gk20a(dbg_s->pdev);
	624	gk20a_dbg_fn("%s powergate mode = %d",
	625	dev_name(dbg_s->dev), args->mode);
	626
	627	mutex_lock(&g->dbg_sessions_lock);
	628	err = dbg_set_powergate(dbg_s, args->mode);
	629	mutex_unlock(&g->dbg_sessions_lock);
	630	return err;
	631	}
	632
	633	static int nvhost_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
	634	struct nvhost_dbg_gpu_smpc_ctxsw_mode_args *args)
	635	{
	636	int err;
	637	struct gk20a *g = get_gk20a(dbg_s->pdev);
	638	struct channel_gk20a *ch_gk20a;
	639
	640	gk20a_dbg_fn("%s smpc ctxsw mode = %d",
	641	dev_name(dbg_s->dev), args->mode);
	642
	643	/* Take the global lock, since we'll be doing global regops */
	644	mutex_lock(&g->dbg_sessions_lock);
	645
	646	ch_gk20a = dbg_s->ch;
	647
	648	if (!ch_gk20a) {
	649	gk20a_err(dev_from_gk20a(dbg_s->g),
	650	"no bound channel for smpc ctxsw mode update\n");
	651	err = -EINVAL;
	652	goto clean_up;
	653	}
	654
	655	err = gr_gk20a_update_smpc_ctxsw_mode(g, ch_gk20a,
	656	args->mode == NVHOST_DBG_GPU_SMPC_CTXSW_MODE_CTXSW);
	657	if (err) {
	658	gk20a_err(dev_from_gk20a(dbg_s->g),
	659	"error (%d) during smpc ctxsw mode update\n", err);
	660	goto clean_up;
	661	}
	662	/* The following regops are a hack/war to make up for the fact that we
	663	* just scribbled into the ctxsw image w/o really knowing whether
	664	* it was already swapped out in/out once or not, etc.
	665	*/
	666	{
	667	struct nvhost_dbg_gpu_reg_op ops[4];
	668	int i;
	669	for (i = 0; i < ARRAY_SIZE(ops); i++) {
	670	ops[i].op = NVHOST_DBG_GPU_REG_OP_WRITE_32;
	671	ops[i].type = NVHOST_DBG_GPU_REG_OP_TYPE_GR_CTX;
	672	ops[i].status = NVHOST_DBG_GPU_REG_OP_STATUS_SUCCESS;
	673	ops[i].value_hi = 0;
	674	ops[i].and_n_mask_lo = 0;
	675	ops[i].and_n_mask_hi = 0;
	676	}
	677	/* gr_pri_gpcs_tpcs_sm_dsm_perf_counter_control_sel1_r();*/
	678	ops[0].offset = 0x00419e08;
	679	ops[0].value_lo = 0x1d;
	680
	681	/* gr_pri_gpcs_tpcs_sm_dsm_perf_counter_control5_r(); */
	682	ops[1].offset = 0x00419e58;
	683	ops[1].value_lo = 0x1;
	684
	685	/* gr_pri_gpcs_tpcs_sm_dsm_perf_counter_control3_r(); */
	686	ops[2].offset = 0x00419e68;
	687	ops[2].value_lo = 0xaaaa;
	688
	689	/* gr_pri_gpcs_tpcs_sm_dsm_perf_counter4_control_r(); */
	690	ops[3].offset = 0x00419f40;
	691	ops[3].value_lo = 0x18;
	692
	693	err = dbg_s->ops->exec_reg_ops(dbg_s, ops, ARRAY_SIZE(ops));
	694	}
	695
	696	clean_up:
	697	mutex_unlock(&g->dbg_sessions_lock);
	698	return err;
	699	}