gpu: nvgpu: Removal of regops from CUDA driver

The current CUDA drivers have been using the regops to directly accessing the GPU registers from user space through the dbg node. This is a security hole and needs to be avoided. The patch alternatively implements the similar functionality in the kernel and provide an ioctl for it. Bug 200083334 Change-Id: Ic5ff5a215cbabe7a46837bc4e15efcceb0df0367 Signed-off-by: sujeet baranwal <sbaranwal@nvidia.com> Reviewed-on: http://git-master/r/711758 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
author: sujeet baranwal <sbaranwal@nvidia.com> 2015-03-02 18:36:22 -0500
committer: Dan Willemsen <dwillemsen@nvidia.com> 2015-04-04 21:58:04 -0400
commit: 895675e1d5790e2361b22edb50d702f7dd9a8edd (patch)
tree: dbe3586cec5351fd2c2eb13d91c258e663d73b08 /drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
parent: cf0085ec231246748b34081d2786c29cedcbd706 (diff)
1 files changed, 264 insertions, 1 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
index 7b617a03..5df420ff 100644
--- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
@@ -20,9 +20,16 @@
 #include <linux/anon_inodes.h>
 #include <linux/nvgpu.h>
 #include <uapi/linux/nvgpu.h>
+#include <linux/delay.h>
 #include "gk20a.h"
+#include "gr_gk20a.h"
 #include "fence_gk20a.h"
+#include "regops_gk20a.h"
+#include "hw_gr_gk20a.h"
+#include "hw_fb_gk20a.h"
+#include "hw_proj_gk20a.h"
 int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp)
 {
@@ -257,6 +264,238 @@ static int nvgpu_gpu_ioctl_l2_fb_ops(struct gk20a *g,
        return err;
 }
+/* Invalidate i-cache for kepler & maxwell */
+static int nvgpu_gpu_ioctl_inval_icache(
+                struct gk20a *g,
+                struct nvgpu_gpu_inval_icache_args *args)
+{
+        int err = 0;
+        u32     cache_ctrl, regval;
+        struct channel_gk20a *ch;
+        struct nvgpu_dbg_gpu_reg_op ops;
+        ch = gk20a_get_channel_from_file(args->channel_fd);
+        ops.op     = REGOP(READ_32);
+        ops.type   = REGOP(TYPE_GR_CTX);
+        ops.status = REGOP(STATUS_SUCCESS);
+        ops.value_hi      = 0;
+        ops.and_n_mask_lo = 0;
+        ops.and_n_mask_hi = 0;
+        ops.offset       = gr_pri_gpc0_gcc_dbg_r();
+        /* Take the global lock, since we'll be doing global regops */
+        mutex_lock(&g->dbg_sessions_lock);
+        err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 0, 1);
+        regval = ops.value_lo;
+        if (!err) {
+                ops.op = REGOP(WRITE_32);
+                ops.value_lo = set_field(regval, gr_pri_gpcs_gcc_dbg_invalidate_m(), 1);
+                err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 1, 0);
+        }
+        if (err) {
+                gk20a_err(dev_from_gk20a(g), "Failed to access register\n");
+                goto end;
+        }
+        cache_ctrl = gk20a_readl(g, gr_pri_gpc0_tpc0_sm_cache_control_r());
+        cache_ctrl = set_field(cache_ctrl, gr_pri_gpcs_tpcs_sm_cache_control_invalidate_cache_m(), 1);
+        gk20a_writel(g, gr_pri_gpc0_tpc0_sm_cache_control_r(), cache_ctrl);
+end:
+        mutex_unlock(&g->dbg_sessions_lock);
+        return err;
+}
+static int nvgpu_gpu_ioctl_set_mmu_debug_mode(
+                struct gk20a *g,
+                struct nvgpu_gpu_mmu_debug_mode_args *args)
+{
+        int err = 0;
+        u32 mmu_debug_ctrl;
+        err = gk20a_busy(g->dev);
+        if (err) {
+                gk20a_err(dev_from_gk20a(g), "failed to power on gpu\n");
+                return -EINVAL;
+        }
+        mutex_lock(&g->dbg_sessions_lock);
+        if (args->state == 1) {
+                mmu_debug_ctrl = fb_mmu_debug_ctrl_debug_enabled_v();
+                g->mmu_debug_ctrl = true;
+        } else {
+                mmu_debug_ctrl = fb_mmu_debug_ctrl_debug_disabled_v();
+                g->mmu_debug_ctrl = false;
+        }
+        mmu_debug_ctrl = gk20a_readl(g, fb_mmu_debug_ctrl_r());
+        mmu_debug_ctrl = set_field(mmu_debug_ctrl, fb_mmu_debug_ctrl_debug_m(), mmu_debug_ctrl);
+        gk20a_writel(g, fb_mmu_debug_ctrl_r(), mmu_debug_ctrl);
+        mutex_unlock(&g->dbg_sessions_lock);
+        gk20a_idle(g->dev);
+        return err;
+}
+static int nvgpu_gpu_ioctl_set_debug_mode(
+                struct gk20a *g,
+                struct nvgpu_gpu_sm_debug_mode_args *args)
+{
+        int gpc, tpc, err = 0;
+        u32 sm_id, sm_dbgr_ctrl0;
+        struct channel_gk20a *ch;
+        struct nvgpu_dbg_gpu_reg_op ops;
+        u32  tpc_offset, gpc_offset, reg_offset;
+        ch = gk20a_get_channel_from_file(args->channel_fd);
+        mutex_lock(&g->dbg_sessions_lock);
+        for (sm_id = 0; sm_id < g->gr.no_of_sm; sm_id++) {
+                if (args->sms & (1 << sm_id)) {
+                        gpc = g->gr.sm_to_cluster[sm_id].gpc_index;
+                        tpc = g->gr.sm_to_cluster[sm_id].tpc_index;
+                        tpc_offset = proj_tpc_in_gpc_stride_v() * tpc;
+                        gpc_offset = proj_gpc_stride_v() * gpc;
+                        reg_offset = tpc_offset + gpc_offset;
+                        ops.op     = REGOP(READ_32);
+                        ops.type   = REGOP(TYPE_GR_CTX);
+                        ops.status = REGOP(STATUS_SUCCESS);
+                        ops.value_hi      = 0;
+                        ops.and_n_mask_lo = 0;
+                        ops.and_n_mask_hi = 0;
+                        ops.offset       = gr_gpc0_tpc0_sm_dbgr_control0_r() + reg_offset;
+                        err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 0, 1);
+                        sm_dbgr_ctrl0 = ops.value_lo;
+                        if (args->enable) {
+                                sm_dbgr_ctrl0 = gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_on_v() |
+                                                          gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_warp_disable_f() |
+                                                          gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_sm_disable_f() |
+                                                          sm_dbgr_ctrl0;
+                        } else
+                                sm_dbgr_ctrl0 = gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_off_v() | sm_dbgr_ctrl0;
+                        if (!err) {
+                                ops.op = REGOP(WRITE_32);
+                                ops.value_lo = sm_dbgr_ctrl0;
+                                err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 1, 0);
+                        } else
+                                gk20a_err(dev_from_gk20a(g), "Failed to access register\n");
+                }
+        }
+        mutex_unlock(&g->dbg_sessions_lock);
+        return err;
+}
+static int nvgpu_gpu_ioctl_wait_for_pause(
+                struct gk20a *g,
+                struct nvgpu_gpu_wait_pause_args *args)
+{
+        int err = 0, gpc, tpc;
+        u32 sm_count, sm_id, size;
+        struct warpstate *w_state;
+        struct gr_gk20a *gr = &g->gr;
+        u32  tpc_offset, gpc_offset, reg_offset, global_mask;
+        u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0;
+        sm_count = g->gr.gpc_count * g->gr.tpc_count;
+        size = sm_count * sizeof(struct warpstate);
+        w_state = kzalloc(size, GFP_KERNEL);
+        global_mask = gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f()   |
+                          gr_gpc0_tpc0_sm_hww_global_esr_bpt_pause_pending_f() |
+                          gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f();
+        mutex_lock(&g->dbg_sessions_lock);
+        for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) {
+                gpc = g->gr.sm_to_cluster[sm_id].gpc_index;
+                tpc = g->gr.sm_to_cluster[sm_id].tpc_index;
+                tpc_offset = proj_tpc_in_gpc_stride_v() * tpc;
+                gpc_offset = proj_gpc_stride_v() * gpc;
+                reg_offset = tpc_offset + gpc_offset;
+                /* Wait until all valid warps on the sm are paused. The valid warp mask
+                 * must be re-read with the paused mask because new warps may become
+                 * valid as the sm is pausing.
+                 */
+                err = gk20a_gr_lock_down_sm(g, gpc, tpc, global_mask);
+                if (err) {
+                        gk20a_err(dev_from_gk20a(g), "sm did not lock down!\n");
+                        goto end;
+                }
+                /* 64 bit read */
+                warps_valid = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_r() + reg_offset) << 32;
+                warps_valid |= gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_r() + reg_offset + 4);
+                /* 64 bit read */
+                warps_paused = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r() + reg_offset) << 32;
+                warps_paused |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r() + reg_offset + 4);
+                /* 64 bit read */
+                warps_trapped = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r() + reg_offset) << 32;
+                warps_trapped |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r() + reg_offset + 4);
+                w_state[sm_id].valid_warps = warps_valid;
+                w_state[sm_id].trapped_warps = warps_trapped;
+                w_state[sm_id].paused_warps = warps_paused;
+        }
+        if (copy_to_user((void __user *)(uintptr_t)args->pwarpstate, w_state, size)) {
+                gk20a_dbg_fn("copy_to_user failed!");
+                err = -EFAULT;
+        }
+end:
+        mutex_unlock(&g->dbg_sessions_lock);
+        kfree(w_state);
+        return err;
+}
+static int nvgpu_gpu_ioctl_has_any_exception(
+                struct gk20a *g,
+                struct nvgpu_gpu_tpc_exception_en_status_args *args)
+{
+        int err = 0;
+        struct gr_gk20a *gr = &g->gr;
+        u32 sm_id, tpc_exception_en = 0;
+        u32 offset, regval, tpc_offset, gpc_offset;
+        mutex_lock(&g->dbg_sessions_lock);
+        for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) {
+                tpc_offset = proj_tpc_in_gpc_stride_v() * g->gr.sm_to_cluster[sm_id].tpc_index;
+                gpc_offset = proj_gpc_stride_v() * g->gr.sm_to_cluster[sm_id].gpc_index;
+                offset = tpc_offset + gpc_offset;
+                regval = gk20a_readl(g, gr_gpc0_tpc0_tpccs_tpc_exception_en_r() +
+                                                                offset);
+                /* Each bit represents corresponding enablement state, bit 0 corrsponds to SM0 */
+                tpc_exception_en |= gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_v(regval) << sm_id;
+        }
+        mutex_unlock(&g->dbg_sessions_lock);
+        args->tpc_exception_en_sm_mask = tpc_exception_en;
+        return err;
+}
 long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
        struct platform_device *dev = filp->private_data;
@@ -441,6 +680,31 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
                err = nvgpu_gpu_ioctl_l2_fb_ops(g,
                           (struct nvgpu_gpu_l2_fb_args *)buf);
                break;
+        case NVGPU_GPU_IOCTL_INVAL_ICACHE:
+                err = gr_gk20a_elpg_protected_call(g,
+                                nvgpu_gpu_ioctl_inval_icache(g, (struct nvgpu_gpu_inval_icache_args *)buf));
+                break;
+        case NVGPU_GPU_IOCTL_SET_MMUDEBUG_MODE:
+                err =  nvgpu_gpu_ioctl_set_mmu_debug_mode(g,
+                                (struct nvgpu_gpu_mmu_debug_mode_args *)buf);
+                break;
+        case NVGPU_GPU_IOCTL_SET_SM_DEBUG_MODE:
+                err = gr_gk20a_elpg_protected_call(g,
+                                nvgpu_gpu_ioctl_set_debug_mode(g, (struct nvgpu_gpu_sm_debug_mode_args *)buf));
+                break;
+        case NVGPU_GPU_IOCTL_WAIT_FOR_PAUSE:
+                err =  nvgpu_gpu_ioctl_wait_for_pause(g,
+                                (struct nvgpu_gpu_wait_pause_args *)buf);
+                break;
+        case NVGPU_GPU_IOCTL_GET_TPC_EXCEPTION_EN_STATUS:
+                err =  nvgpu_gpu_ioctl_has_any_exception(g,
+                                (struct nvgpu_gpu_tpc_exception_en_status_args *)buf);
+                break;
        default:
                dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd);
                err = -ENOTTY;
@@ -452,4 +716,3 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
        return err;
 }
author	sujeet baranwal <sbaranwal@nvidia.com>	2015-03-02 18:36:22 -0500
committer	Dan Willemsen <dwillemsen@nvidia.com>	2015-04-04 21:58:04 -0400
commit	895675e1d5790e2361b22edb50d702f7dd9a8edd (patch)
tree	dbe3586cec5351fd2c2eb13d91c258e663d73b08 /drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
parent	cf0085ec231246748b34081d2786c29cedcbd706 (diff)

diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c index 7b617a03..5df420ff 100644 --- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
@@ -20,9 +20,16 @@
20	#include <linux/anon_inodes.h>	20	#include <linux/anon_inodes.h>
21	#include <linux/nvgpu.h>	21	#include <linux/nvgpu.h>
22	#include <uapi/linux/nvgpu.h>	22	#include <uapi/linux/nvgpu.h>
		23	#include <linux/delay.h>
23		24
24	#include "gk20a.h"	25	#include "gk20a.h"
		26	#include "gr_gk20a.h"
25	#include "fence_gk20a.h"	27	#include "fence_gk20a.h"
		28	#include "regops_gk20a.h"
		29	#include "hw_gr_gk20a.h"
		30	#include "hw_fb_gk20a.h"
		31	#include "hw_proj_gk20a.h"
		32
26		33
27	int gk20a_ctrl_dev_open(struct inode inode, struct file filp)	34	int gk20a_ctrl_dev_open(struct inode inode, struct file filp)
28	{	35	{
@@ -257,6 +264,238 @@ static int nvgpu_gpu_ioctl_l2_fb_ops(struct gk20a *g,
257	return err;	264	return err;
258	}	265	}
259		266
		267	/* Invalidate i-cache for kepler & maxwell */
		268	static int nvgpu_gpu_ioctl_inval_icache(
		269	struct gk20a *g,
		270	struct nvgpu_gpu_inval_icache_args *args)
		271	{
		272
		273	int err = 0;
		274	u32 cache_ctrl, regval;
		275	struct channel_gk20a *ch;
		276	struct nvgpu_dbg_gpu_reg_op ops;
		277
		278	ch = gk20a_get_channel_from_file(args->channel_fd);
		279
		280	ops.op = REGOP(READ_32);
		281	ops.type = REGOP(TYPE_GR_CTX);
		282	ops.status = REGOP(STATUS_SUCCESS);
		283	ops.value_hi = 0;
		284	ops.and_n_mask_lo = 0;
		285	ops.and_n_mask_hi = 0;
		286	ops.offset = gr_pri_gpc0_gcc_dbg_r();
		287
		288	/* Take the global lock, since we'll be doing global regops */
		289	mutex_lock(&g->dbg_sessions_lock);
		290
		291	err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 0, 1);
		292
		293	regval = ops.value_lo;
		294
		295	if (!err) {
		296	ops.op = REGOP(WRITE_32);
		297	ops.value_lo = set_field(regval, gr_pri_gpcs_gcc_dbg_invalidate_m(), 1);
		298	err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 1, 0);
		299	}
		300
		301	if (err) {
		302	gk20a_err(dev_from_gk20a(g), "Failed to access register\n");
		303	goto end;
		304	}
		305
		306	cache_ctrl = gk20a_readl(g, gr_pri_gpc0_tpc0_sm_cache_control_r());
		307	cache_ctrl = set_field(cache_ctrl, gr_pri_gpcs_tpcs_sm_cache_control_invalidate_cache_m(), 1);
		308	gk20a_writel(g, gr_pri_gpc0_tpc0_sm_cache_control_r(), cache_ctrl);
		309
		310	end:
		311	mutex_unlock(&g->dbg_sessions_lock);
		312	return err;
		313	}
		314
		315	static int nvgpu_gpu_ioctl_set_mmu_debug_mode(
		316	struct gk20a *g,
		317	struct nvgpu_gpu_mmu_debug_mode_args *args)
		318	{
		319	int err = 0;
		320	u32 mmu_debug_ctrl;
		321
		322	err = gk20a_busy(g->dev);
		323	if (err) {
		324	gk20a_err(dev_from_gk20a(g), "failed to power on gpu\n");
		325	return -EINVAL;
		326	}
		327
		328	mutex_lock(&g->dbg_sessions_lock);
		329
		330	if (args->state == 1) {
		331	mmu_debug_ctrl = fb_mmu_debug_ctrl_debug_enabled_v();
		332	g->mmu_debug_ctrl = true;
		333	} else {
		334	mmu_debug_ctrl = fb_mmu_debug_ctrl_debug_disabled_v();
		335	g->mmu_debug_ctrl = false;
		336	}
		337
		338	mmu_debug_ctrl = gk20a_readl(g, fb_mmu_debug_ctrl_r());
		339	mmu_debug_ctrl = set_field(mmu_debug_ctrl, fb_mmu_debug_ctrl_debug_m(), mmu_debug_ctrl);
		340	gk20a_writel(g, fb_mmu_debug_ctrl_r(), mmu_debug_ctrl);
		341
		342	mutex_unlock(&g->dbg_sessions_lock);
		343	gk20a_idle(g->dev);
		344	return err;
		345	}
		346
		347	static int nvgpu_gpu_ioctl_set_debug_mode(
		348	struct gk20a *g,
		349	struct nvgpu_gpu_sm_debug_mode_args *args)
		350	{
		351	int gpc, tpc, err = 0;
		352	u32 sm_id, sm_dbgr_ctrl0;
		353	struct channel_gk20a *ch;
		354	struct nvgpu_dbg_gpu_reg_op ops;
		355	u32 tpc_offset, gpc_offset, reg_offset;
		356
		357	ch = gk20a_get_channel_from_file(args->channel_fd);
		358
		359	mutex_lock(&g->dbg_sessions_lock);
		360
		361	for (sm_id = 0; sm_id < g->gr.no_of_sm; sm_id++) {
		362	if (args->sms & (1 << sm_id)) {
		363	gpc = g->gr.sm_to_cluster[sm_id].gpc_index;
		364	tpc = g->gr.sm_to_cluster[sm_id].tpc_index;
		365
		366	tpc_offset = proj_tpc_in_gpc_stride_v() * tpc;
		367	gpc_offset = proj_gpc_stride_v() * gpc;
		368	reg_offset = tpc_offset + gpc_offset;
		369
		370	ops.op = REGOP(READ_32);
		371	ops.type = REGOP(TYPE_GR_CTX);
		372	ops.status = REGOP(STATUS_SUCCESS);
		373	ops.value_hi = 0;
		374	ops.and_n_mask_lo = 0;
		375	ops.and_n_mask_hi = 0;
		376	ops.offset = gr_gpc0_tpc0_sm_dbgr_control0_r() + reg_offset;
		377
		378	err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 0, 1);
		379	sm_dbgr_ctrl0 = ops.value_lo;
		380
		381	if (args->enable) {
		382	sm_dbgr_ctrl0 = gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_on_v() \|
		383	gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_warp_disable_f() \|
		384	gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_sm_disable_f() \|
		385	sm_dbgr_ctrl0;
		386	} else
		387	sm_dbgr_ctrl0 = gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_off_v() \| sm_dbgr_ctrl0;
		388
		389	if (!err) {
		390	ops.op = REGOP(WRITE_32);
		391	ops.value_lo = sm_dbgr_ctrl0;
		392	err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 1, 0);
		393	} else
		394	gk20a_err(dev_from_gk20a(g), "Failed to access register\n");
		395	}
		396	}
		397
		398	mutex_unlock(&g->dbg_sessions_lock);
		399	return err;
		400	}
		401
		402	static int nvgpu_gpu_ioctl_wait_for_pause(
		403	struct gk20a *g,
		404	struct nvgpu_gpu_wait_pause_args *args)
		405	{
		406	int err = 0, gpc, tpc;
		407	u32 sm_count, sm_id, size;
		408	struct warpstate *w_state;
		409	struct gr_gk20a *gr = &g->gr;
		410	u32 tpc_offset, gpc_offset, reg_offset, global_mask;
		411	u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0;
		412
		413	sm_count = g->gr.gpc_count * g->gr.tpc_count;
		414	size = sm_count * sizeof(struct warpstate);
		415	w_state = kzalloc(size, GFP_KERNEL);
		416
		417	global_mask = gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f() \|
		418	gr_gpc0_tpc0_sm_hww_global_esr_bpt_pause_pending_f() \|
		419	gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f();
		420
		421	mutex_lock(&g->dbg_sessions_lock);
		422
		423	for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) {
		424
		425	gpc = g->gr.sm_to_cluster[sm_id].gpc_index;
		426	tpc = g->gr.sm_to_cluster[sm_id].tpc_index;
		427
		428	tpc_offset = proj_tpc_in_gpc_stride_v() * tpc;
		429	gpc_offset = proj_gpc_stride_v() * gpc;
		430	reg_offset = tpc_offset + gpc_offset;
		431
		432	/* Wait until all valid warps on the sm are paused. The valid warp mask
		433	* must be re-read with the paused mask because new warps may become
		434	* valid as the sm is pausing.
		435	*/
		436
		437	err = gk20a_gr_lock_down_sm(g, gpc, tpc, global_mask);
		438	if (err) {
		439	gk20a_err(dev_from_gk20a(g), "sm did not lock down!\n");
		440	goto end;
		441	}
		442
		443	/* 64 bit read */
		444	warps_valid = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_r() + reg_offset) << 32;
		445	warps_valid \|= gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_r() + reg_offset + 4);
		446
		447	/* 64 bit read */
		448	warps_paused = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r() + reg_offset) << 32;
		449	warps_paused \|= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r() + reg_offset + 4);
		450
		451	/* 64 bit read */
		452	warps_trapped = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r() + reg_offset) << 32;
		453	warps_trapped \|= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r() + reg_offset + 4);
		454
		455	w_state[sm_id].valid_warps = warps_valid;
		456	w_state[sm_id].trapped_warps = warps_trapped;
		457	w_state[sm_id].paused_warps = warps_paused;
		458	}
		459
		460	if (copy_to_user((void __user *)(uintptr_t)args->pwarpstate, w_state, size)) {
		461	gk20a_dbg_fn("copy_to_user failed!");
		462	err = -EFAULT;
		463	}
		464
		465	end:
		466	mutex_unlock(&g->dbg_sessions_lock);
		467	kfree(w_state);
		468	return err;
		469	}
		470
		471	static int nvgpu_gpu_ioctl_has_any_exception(
		472	struct gk20a *g,
		473	struct nvgpu_gpu_tpc_exception_en_status_args *args)
		474	{
		475	int err = 0;
		476	struct gr_gk20a *gr = &g->gr;
		477	u32 sm_id, tpc_exception_en = 0;
		478	u32 offset, regval, tpc_offset, gpc_offset;
		479
		480	mutex_lock(&g->dbg_sessions_lock);
		481
		482	for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) {
		483
		484	tpc_offset = proj_tpc_in_gpc_stride_v() * g->gr.sm_to_cluster[sm_id].tpc_index;
		485	gpc_offset = proj_gpc_stride_v() * g->gr.sm_to_cluster[sm_id].gpc_index;
		486	offset = tpc_offset + gpc_offset;
		487
		488	regval = gk20a_readl(g, gr_gpc0_tpc0_tpccs_tpc_exception_en_r() +
		489	offset);
		490	/* Each bit represents corresponding enablement state, bit 0 corrsponds to SM0 */
		491	tpc_exception_en \|= gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_v(regval) << sm_id;
		492	}
		493
		494	mutex_unlock(&g->dbg_sessions_lock);
		495	args->tpc_exception_en_sm_mask = tpc_exception_en;
		496	return err;
		497	}
		498
260	long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)	499	long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
261	{	500	{
262	struct platform_device *dev = filp->private_data;	501	struct platform_device *dev = filp->private_data;
@@ -441,6 +680,31 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
441	err = nvgpu_gpu_ioctl_l2_fb_ops(g,	680	err = nvgpu_gpu_ioctl_l2_fb_ops(g,
442	(struct nvgpu_gpu_l2_fb_args *)buf);	681	(struct nvgpu_gpu_l2_fb_args *)buf);
443	break;	682	break;
		683	case NVGPU_GPU_IOCTL_INVAL_ICACHE:
		684	err = gr_gk20a_elpg_protected_call(g,
		685	nvgpu_gpu_ioctl_inval_icache(g, (struct nvgpu_gpu_inval_icache_args *)buf));
		686	break;
		687
		688	case NVGPU_GPU_IOCTL_SET_MMUDEBUG_MODE:
		689	err = nvgpu_gpu_ioctl_set_mmu_debug_mode(g,
		690	(struct nvgpu_gpu_mmu_debug_mode_args *)buf);
		691	break;
		692
		693	case NVGPU_GPU_IOCTL_SET_SM_DEBUG_MODE:
		694	err = gr_gk20a_elpg_protected_call(g,
		695	nvgpu_gpu_ioctl_set_debug_mode(g, (struct nvgpu_gpu_sm_debug_mode_args *)buf));
		696	break;
		697
		698	case NVGPU_GPU_IOCTL_WAIT_FOR_PAUSE:
		699	err = nvgpu_gpu_ioctl_wait_for_pause(g,
		700	(struct nvgpu_gpu_wait_pause_args *)buf);
		701	break;
		702
		703	case NVGPU_GPU_IOCTL_GET_TPC_EXCEPTION_EN_STATUS:
		704	err = nvgpu_gpu_ioctl_has_any_exception(g,
		705	(struct nvgpu_gpu_tpc_exception_en_status_args *)buf);
		706	break;
		707
444	default:	708	default:
445	dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd);	709	dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd);
446	err = -ENOTTY;	710	err = -ENOTTY;
@@ -452,4 +716,3 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
452		716
453	return err;	717	return err;
454	}	718	}
455