From 895675e1d5790e2361b22edb50d702f7dd9a8edd Mon Sep 17 00:00:00 2001 From: sujeet baranwal Date: Mon, 2 Mar 2015 15:36:22 -0800 Subject: gpu: nvgpu: Removal of regops from CUDA driver The current CUDA drivers have been using the regops to directly accessing the GPU registers from user space through the dbg node. This is a security hole and needs to be avoided. The patch alternatively implements the similar functionality in the kernel and provide an ioctl for it. Bug 200083334 Change-Id: Ic5ff5a215cbabe7a46837bc4e15efcceb0df0367 Signed-off-by: sujeet baranwal Reviewed-on: http://git-master/r/711758 Reviewed-by: Terje Bergstrom Tested-by: Terje Bergstrom --- drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c | 265 +++++++++++++++++++++++++++++++++- drivers/gpu/nvgpu/gk20a/gk20a.c | 20 +++ drivers/gpu/nvgpu/gk20a/gk20a.h | 1 + drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 13 +- drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 7 + drivers/gpu/nvgpu/gk20a/hw_fb_gk20a.h | 10 +- drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h | 72 +++++++++ drivers/gpu/nvgpu/gm20b/gr_gm20b.c | 5 + drivers/gpu/nvgpu/gm20b/hw_fb_gm20b.h | 8 + drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h | 68 +++++++++ 10 files changed, 465 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/nvgpu') diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c index 7b617a03..5df420ff 100644 --- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c @@ -20,9 +20,16 @@ #include #include #include +#include #include "gk20a.h" +#include "gr_gk20a.h" #include "fence_gk20a.h" +#include "regops_gk20a.h" +#include "hw_gr_gk20a.h" +#include "hw_fb_gk20a.h" +#include "hw_proj_gk20a.h" + int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp) { @@ -257,6 +264,238 @@ static int nvgpu_gpu_ioctl_l2_fb_ops(struct gk20a *g, return err; } +/* Invalidate i-cache for kepler & maxwell */ +static int nvgpu_gpu_ioctl_inval_icache( + struct gk20a *g, + struct nvgpu_gpu_inval_icache_args *args) +{ + + int err = 0; + u32 cache_ctrl, regval; + struct channel_gk20a *ch; + struct nvgpu_dbg_gpu_reg_op ops; + + ch = gk20a_get_channel_from_file(args->channel_fd); + + ops.op = REGOP(READ_32); + ops.type = REGOP(TYPE_GR_CTX); + ops.status = REGOP(STATUS_SUCCESS); + ops.value_hi = 0; + ops.and_n_mask_lo = 0; + ops.and_n_mask_hi = 0; + ops.offset = gr_pri_gpc0_gcc_dbg_r(); + + /* Take the global lock, since we'll be doing global regops */ + mutex_lock(&g->dbg_sessions_lock); + + err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 0, 1); + + regval = ops.value_lo; + + if (!err) { + ops.op = REGOP(WRITE_32); + ops.value_lo = set_field(regval, gr_pri_gpcs_gcc_dbg_invalidate_m(), 1); + err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 1, 0); + } + + if (err) { + gk20a_err(dev_from_gk20a(g), "Failed to access register\n"); + goto end; + } + + cache_ctrl = gk20a_readl(g, gr_pri_gpc0_tpc0_sm_cache_control_r()); + cache_ctrl = set_field(cache_ctrl, gr_pri_gpcs_tpcs_sm_cache_control_invalidate_cache_m(), 1); + gk20a_writel(g, gr_pri_gpc0_tpc0_sm_cache_control_r(), cache_ctrl); + +end: + mutex_unlock(&g->dbg_sessions_lock); + return err; +} + +static int nvgpu_gpu_ioctl_set_mmu_debug_mode( + struct gk20a *g, + struct nvgpu_gpu_mmu_debug_mode_args *args) +{ + int err = 0; + u32 mmu_debug_ctrl; + + err = gk20a_busy(g->dev); + if (err) { + gk20a_err(dev_from_gk20a(g), "failed to power on gpu\n"); + return -EINVAL; + } + + mutex_lock(&g->dbg_sessions_lock); + + if (args->state == 1) { + mmu_debug_ctrl = fb_mmu_debug_ctrl_debug_enabled_v(); + g->mmu_debug_ctrl = true; + } else { + mmu_debug_ctrl = fb_mmu_debug_ctrl_debug_disabled_v(); + g->mmu_debug_ctrl = false; + } + + mmu_debug_ctrl = gk20a_readl(g, fb_mmu_debug_ctrl_r()); + mmu_debug_ctrl = set_field(mmu_debug_ctrl, fb_mmu_debug_ctrl_debug_m(), mmu_debug_ctrl); + gk20a_writel(g, fb_mmu_debug_ctrl_r(), mmu_debug_ctrl); + + mutex_unlock(&g->dbg_sessions_lock); + gk20a_idle(g->dev); + return err; +} + +static int nvgpu_gpu_ioctl_set_debug_mode( + struct gk20a *g, + struct nvgpu_gpu_sm_debug_mode_args *args) +{ + int gpc, tpc, err = 0; + u32 sm_id, sm_dbgr_ctrl0; + struct channel_gk20a *ch; + struct nvgpu_dbg_gpu_reg_op ops; + u32 tpc_offset, gpc_offset, reg_offset; + + ch = gk20a_get_channel_from_file(args->channel_fd); + + mutex_lock(&g->dbg_sessions_lock); + + for (sm_id = 0; sm_id < g->gr.no_of_sm; sm_id++) { + if (args->sms & (1 << sm_id)) { + gpc = g->gr.sm_to_cluster[sm_id].gpc_index; + tpc = g->gr.sm_to_cluster[sm_id].tpc_index; + + tpc_offset = proj_tpc_in_gpc_stride_v() * tpc; + gpc_offset = proj_gpc_stride_v() * gpc; + reg_offset = tpc_offset + gpc_offset; + + ops.op = REGOP(READ_32); + ops.type = REGOP(TYPE_GR_CTX); + ops.status = REGOP(STATUS_SUCCESS); + ops.value_hi = 0; + ops.and_n_mask_lo = 0; + ops.and_n_mask_hi = 0; + ops.offset = gr_gpc0_tpc0_sm_dbgr_control0_r() + reg_offset; + + err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 0, 1); + sm_dbgr_ctrl0 = ops.value_lo; + + if (args->enable) { + sm_dbgr_ctrl0 = gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_on_v() | + gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_warp_disable_f() | + gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_sm_disable_f() | + sm_dbgr_ctrl0; + } else + sm_dbgr_ctrl0 = gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_off_v() | sm_dbgr_ctrl0; + + if (!err) { + ops.op = REGOP(WRITE_32); + ops.value_lo = sm_dbgr_ctrl0; + err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 1, 0); + } else + gk20a_err(dev_from_gk20a(g), "Failed to access register\n"); + } + } + + mutex_unlock(&g->dbg_sessions_lock); + return err; +} + +static int nvgpu_gpu_ioctl_wait_for_pause( + struct gk20a *g, + struct nvgpu_gpu_wait_pause_args *args) +{ + int err = 0, gpc, tpc; + u32 sm_count, sm_id, size; + struct warpstate *w_state; + struct gr_gk20a *gr = &g->gr; + u32 tpc_offset, gpc_offset, reg_offset, global_mask; + u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0; + + sm_count = g->gr.gpc_count * g->gr.tpc_count; + size = sm_count * sizeof(struct warpstate); + w_state = kzalloc(size, GFP_KERNEL); + + global_mask = gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f() | + gr_gpc0_tpc0_sm_hww_global_esr_bpt_pause_pending_f() | + gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f(); + + mutex_lock(&g->dbg_sessions_lock); + + for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) { + + gpc = g->gr.sm_to_cluster[sm_id].gpc_index; + tpc = g->gr.sm_to_cluster[sm_id].tpc_index; + + tpc_offset = proj_tpc_in_gpc_stride_v() * tpc; + gpc_offset = proj_gpc_stride_v() * gpc; + reg_offset = tpc_offset + gpc_offset; + + /* Wait until all valid warps on the sm are paused. The valid warp mask + * must be re-read with the paused mask because new warps may become + * valid as the sm is pausing. + */ + + err = gk20a_gr_lock_down_sm(g, gpc, tpc, global_mask); + if (err) { + gk20a_err(dev_from_gk20a(g), "sm did not lock down!\n"); + goto end; + } + + /* 64 bit read */ + warps_valid = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_r() + reg_offset) << 32; + warps_valid |= gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_r() + reg_offset + 4); + + /* 64 bit read */ + warps_paused = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r() + reg_offset) << 32; + warps_paused |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r() + reg_offset + 4); + + /* 64 bit read */ + warps_trapped = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r() + reg_offset) << 32; + warps_trapped |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r() + reg_offset + 4); + + w_state[sm_id].valid_warps = warps_valid; + w_state[sm_id].trapped_warps = warps_trapped; + w_state[sm_id].paused_warps = warps_paused; + } + + if (copy_to_user((void __user *)(uintptr_t)args->pwarpstate, w_state, size)) { + gk20a_dbg_fn("copy_to_user failed!"); + err = -EFAULT; + } + +end: + mutex_unlock(&g->dbg_sessions_lock); + kfree(w_state); + return err; +} + +static int nvgpu_gpu_ioctl_has_any_exception( + struct gk20a *g, + struct nvgpu_gpu_tpc_exception_en_status_args *args) +{ + int err = 0; + struct gr_gk20a *gr = &g->gr; + u32 sm_id, tpc_exception_en = 0; + u32 offset, regval, tpc_offset, gpc_offset; + + mutex_lock(&g->dbg_sessions_lock); + + for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) { + + tpc_offset = proj_tpc_in_gpc_stride_v() * g->gr.sm_to_cluster[sm_id].tpc_index; + gpc_offset = proj_gpc_stride_v() * g->gr.sm_to_cluster[sm_id].gpc_index; + offset = tpc_offset + gpc_offset; + + regval = gk20a_readl(g, gr_gpc0_tpc0_tpccs_tpc_exception_en_r() + + offset); + /* Each bit represents corresponding enablement state, bit 0 corrsponds to SM0 */ + tpc_exception_en |= gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_v(regval) << sm_id; + } + + mutex_unlock(&g->dbg_sessions_lock); + args->tpc_exception_en_sm_mask = tpc_exception_en; + return err; +} + long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct platform_device *dev = filp->private_data; @@ -441,6 +680,31 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg err = nvgpu_gpu_ioctl_l2_fb_ops(g, (struct nvgpu_gpu_l2_fb_args *)buf); break; + case NVGPU_GPU_IOCTL_INVAL_ICACHE: + err = gr_gk20a_elpg_protected_call(g, + nvgpu_gpu_ioctl_inval_icache(g, (struct nvgpu_gpu_inval_icache_args *)buf)); + break; + + case NVGPU_GPU_IOCTL_SET_MMUDEBUG_MODE: + err = nvgpu_gpu_ioctl_set_mmu_debug_mode(g, + (struct nvgpu_gpu_mmu_debug_mode_args *)buf); + break; + + case NVGPU_GPU_IOCTL_SET_SM_DEBUG_MODE: + err = gr_gk20a_elpg_protected_call(g, + nvgpu_gpu_ioctl_set_debug_mode(g, (struct nvgpu_gpu_sm_debug_mode_args *)buf)); + break; + + case NVGPU_GPU_IOCTL_WAIT_FOR_PAUSE: + err = nvgpu_gpu_ioctl_wait_for_pause(g, + (struct nvgpu_gpu_wait_pause_args *)buf); + break; + + case NVGPU_GPU_IOCTL_GET_TPC_EXCEPTION_EN_STATUS: + err = nvgpu_gpu_ioctl_has_any_exception(g, + (struct nvgpu_gpu_tpc_exception_en_status_args *)buf); + break; + default: dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd); err = -ENOTTY; @@ -452,4 +716,3 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg return err; } - diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index 67aa49c6..3389aca5 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c @@ -56,6 +56,8 @@ #include "hw_sim_gk20a.h" #include "hw_top_gk20a.h" #include "hw_ltc_gk20a.h" +#include "hw_gr_gk20a.h" +#include "hw_fb_gk20a.h" #include "gk20a_scale.h" #include "dbg_gpu_gk20a.h" #include "hal.h" @@ -727,6 +729,21 @@ static int gk20a_detect_chip(struct gk20a *g) return gpu_init_hal(g); } +void gk20a_pm_restore_debug_setting(struct gk20a *g) +{ + u32 mmu_debug_ctrl; + + /* restore mmu debug state */ + if (g->mmu_debug_ctrl) + mmu_debug_ctrl = fb_mmu_debug_ctrl_debug_enabled_v(); + else + mmu_debug_ctrl = fb_mmu_debug_ctrl_debug_disabled_v(); + + mmu_debug_ctrl = gk20a_readl(g, fb_mmu_debug_ctrl_r()); + mmu_debug_ctrl = set_field(mmu_debug_ctrl, fb_mmu_debug_ctrl_debug_m(), mmu_debug_ctrl); + gk20a_writel(g, fb_mmu_debug_ctrl_r(), mmu_debug_ctrl); +} + static int gk20a_pm_finalize_poweron(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); @@ -851,6 +868,9 @@ static int gk20a_pm_finalize_poweron(struct device *dev) goto done; } + /* Restore the debug setting */ + gk20a_pm_restore_debug_setting(g); + gk20a_channel_resume(g); set_user_nice(current, nice_value); diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index b9bdc6e6..bcea5655 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -511,6 +511,7 @@ struct gk20a { struct device_dma_parameters dma_parms; struct gk20a_cde_app cde_app; + bool mmu_debug_ctrl; }; static inline unsigned long gk20a_get_gr_idle_timeout(struct gk20a *g) diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 4217658c..7e8d4e13 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -1273,7 +1273,6 @@ static int gr_gk20a_ctx_state_floorsweep(struct gk20a *g) u32 tpc_index, gpc_index; u32 tpc_offset, gpc_offset; u32 sm_id = 0, gpc_id = 0; - u32 sm_id_to_gpc_id[proj_scal_max_gpcs_v() * proj_scal_max_tpc_per_gpc_v()]; u32 tpc_per_gpc; u32 max_ways_evict = INVALID_MAX_WAYS; u32 l1c_dbg_reg_val; @@ -1295,7 +1294,9 @@ static int gr_gk20a_ctx_state_floorsweep(struct gk20a *g) gk20a_writel(g, gr_gpc0_tpc0_pe_cfg_smid_r() + gpc_offset + tpc_offset, gr_gpc0_tpc0_pe_cfg_smid_value_f(sm_id)); - sm_id_to_gpc_id[sm_id] = gpc_index; + g->gr.sm_to_cluster[sm_id].tpc_index = tpc_index; + g->gr.sm_to_cluster[sm_id].gpc_index = gpc_index; + sm_id++; } @@ -1306,6 +1307,8 @@ static int gr_gk20a_ctx_state_floorsweep(struct gk20a *g) } } + gr->no_of_sm = sm_id; + for (tpc_index = 0, gpc_id = 0; tpc_index < gr_pd_num_tpc_per_gpc__size_1_v(); tpc_index++, gpc_id += 8) { @@ -2997,6 +3000,7 @@ static void gk20a_remove_gr_support(struct gr_gk20a *gr) kfree(gr->pes_tpc_count[1]); kfree(gr->pes_tpc_mask[0]); kfree(gr->pes_tpc_mask[1]); + kfree(gr->sm_to_cluster); kfree(gr->gpc_skip_mask); kfree(gr->map_tiles); gr->gpc_tpc_count = NULL; @@ -3089,6 +3093,7 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr) gr->pes_tpc_count[1] = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL); gr->pes_tpc_mask[0] = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL); gr->pes_tpc_mask[1] = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL); + gr->gpc_skip_mask = kzalloc(gr_pd_dist_skip_table__size_1_v() * 4 * sizeof(u32), GFP_KERNEL); @@ -3159,6 +3164,10 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr) gr->gpc_skip_mask[gpc_index] = gpc_new_skip_mask; } + gr->sm_to_cluster = kzalloc(gr->gpc_count * gr->tpc_count * + sizeof(struct sm_info), GFP_KERNEL); + gr->no_of_sm = 0; + gk20a_dbg_info("fbps: %d", gr->num_fbps); gk20a_dbg_info("max_gpc_count: %d", gr->max_gpc_count); gk20a_dbg_info("max_fbps_count: %d", gr->max_fbps_count); diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index deafc438..6cabe526 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h @@ -165,6 +165,11 @@ struct zbc_query_params { u32 index_size; /* [out] size, [in] index */ }; +struct sm_info { + u8 gpc_index; + u8 tpc_index; +}; + struct gr_gk20a { struct gk20a *g; struct { @@ -290,6 +295,8 @@ struct gr_gk20a { #ifdef CONFIG_ARCH_TEGRA_18x_SOC struct gr_t18x t18x; #endif + u32 no_of_sm; + struct sm_info *sm_to_cluster; }; void gk20a_fecs_dump_falcon_stats(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/gk20a/hw_fb_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_fb_gk20a.h index 6b8b6718..a0a3ae33 100644 --- a/drivers/gpu/nvgpu/gk20a/hw_fb_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/hw_fb_gk20a.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2014, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2012-2015, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -194,10 +194,18 @@ static inline u32 fb_mmu_debug_ctrl_debug_v(u32 r) { return (r >> 16) & 0x1; } +static inline u32 fb_mmu_debug_ctrl_debug_m(void) +{ + return 0x1 << 16; +} static inline u32 fb_mmu_debug_ctrl_debug_enabled_v(void) { return 0x00000001; } +static inline u32 fb_mmu_debug_ctrl_debug_disabled_v(void) +{ + return 0x00000000; +} static inline u32 fb_mmu_vpr_info_r(void) { return 0x00100cd0; diff --git a/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h index 8fe75614..8a6c2f23 100644 --- a/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h @@ -342,6 +342,30 @@ static inline u32 gr_activity_4_r(void) { return 0x00400390; } +static inline u32 gr_pri_gpc0_gcc_dbg_r(void) +{ + return 0x00501000; +} +static inline u32 gr_pri_gpcs_gcc_dbg_r(void) +{ + return 0x00419000; +} +static inline u32 gr_pri_gpcs_gcc_dbg_invalidate_m(void) +{ + return 0x1 << 1; +} +static inline u32 gr_pri_gpc0_tpc0_sm_cache_control_r(void) +{ + return 0x005046a4; +} +static inline u32 gr_pri_gpcs_tpcs_sm_cache_control_r(void) +{ + return 0x00419ea4; +} +static inline u32 gr_pri_gpcs_tpcs_sm_cache_control_invalidate_cache_m(void) +{ + return 0x1 << 0; +} static inline u32 gr_pri_sked_activity_r(void) { return 0x00407054; @@ -2962,6 +2986,10 @@ static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_en_r(void) { return 0x0050450c; } +static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_v(u32 r) +{ + return (r >> 1) & 0x1; +} static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_enabled_f(void) { return 0x2; @@ -3010,6 +3038,10 @@ static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_on_v(void) { return 0x00000001; } +static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_off_v(void) +{ + return 0x00000000; +} static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_trigger_enable_f(void) { return 0x80000000; @@ -3022,10 +3054,50 @@ static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_run_trigger_task_f(void) { return 0x40000000; } +static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_warp_v(u32 r) +{ + return (r >> 1) & 0x1; +} +static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_warp_disable_f(void) +{ + return 0x0; +} +static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_sm_v(u32 r) +{ + return (r >> 2) & 0x1; +} +static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_sm_disable_f(void) +{ + return 0x0; +} +static inline u32 gr_gpc0_tpc0_sm_warp_valid_mask_r(void) +{ + return 0x00504614; +} +static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r(void) +{ + return 0x00504624; +} +static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r(void) +{ + return 0x00504634; +} +static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_stop_on_any_warp_disable_v(void) +{ + return 0x00000000; +} +static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_stop_on_any_sm_disable_v(void) +{ + return 0x00000000; +} static inline u32 gr_gpc0_tpc0_sm_dbgr_status0_r(void) { return 0x0050460c; } +static inline u32 gr_gpc0_tpc0_sm_dbgr_status0_sm_in_trap_mode_v(u32 r) +{ + return (r >> 0) & 0x1; +} static inline u32 gr_gpc0_tpc0_sm_dbgr_status0_locked_down_v(u32 r) { return (r >> 4) & 0x1; diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index 4c2b00a8..3d99e94d 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c @@ -533,10 +533,15 @@ static int gr_gm20b_ctx_state_floorsweep(struct gk20a *g) + gpc_offset + tpc_offset, gr_gpc0_tpc0_pe_cfg_smid_value_f(sm_id)); + g->gr.sm_to_cluster[sm_id].tpc_index = tpc_index; + g->gr.sm_to_cluster[sm_id].gpc_index = gpc_index; + sm_id++; } } + gr->no_of_sm = sm_id; + for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) tpc_per_gpc |= gr->gpc_tpc_count[gpc_index] << (gr_pd_num_tpc_per_gpc__size_1_v() * gpc_index); diff --git a/drivers/gpu/nvgpu/gm20b/hw_fb_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_fb_gm20b.h index a753074e..214306cb 100644 --- a/drivers/gpu/nvgpu/gm20b/hw_fb_gm20b.h +++ b/drivers/gpu/nvgpu/gm20b/hw_fb_gm20b.h @@ -214,10 +214,18 @@ static inline u32 fb_mmu_debug_ctrl_debug_v(u32 r) { return (r >> 16) & 0x1; } +static inline u32 fb_mmu_debug_ctrl_debug_m(void) +{ + return 0x1 << 16; +} static inline u32 fb_mmu_debug_ctrl_debug_enabled_v(void) { return 0x00000001; } +static inline u32 fb_mmu_debug_ctrl_debug_disabled_v(void) +{ + return 0x00000000; +} static inline u32 fb_mmu_vpr_info_r(void) { return 0x00100cd0; diff --git a/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h index 868b8fe7..11605deb 100644 --- a/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h +++ b/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h @@ -330,6 +330,30 @@ static inline u32 gr_activity_4_r(void) { return 0x00400390; } +static inline u32 gr_pri_gpc0_gcc_dbg_r(void) +{ + return 0x00501000; +} +static inline u32 gr_pri_gpcs_gcc_dbg_r(void) +{ + return 0x00419000; +} +static inline u32 gr_pri_gpcs_gcc_dbg_invalidate_m(void) +{ + return 0x1 << 1; +} +static inline u32 gr_pri_gpc0_tpc0_sm_cache_control_r(void) +{ + return 0x005046a4; +} +static inline u32 gr_pri_gpcs_tpcs_sm_cache_control_r(void) +{ + return 0x00419ea4; +} +static inline u32 gr_pri_gpcs_tpcs_sm_cache_control_invalidate_cache_m(void) +{ + return 0x1 << 0; +} static inline u32 gr_pri_sked_activity_r(void) { return 0x00407054; @@ -2998,6 +3022,10 @@ static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_enabled_f(void) { return 0x2; } +static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_v(u32 r) +{ + return (r >> 1) & 0x1; +} static inline u32 gr_gpcs_gpccs_gpc_exception_en_r(void) { return 0x0041ac94; @@ -3054,10 +3082,50 @@ static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_run_trigger_task_f(void) { return 0x40000000; } +static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_warp_v(u32 r) +{ + return (r >> 1) & 0x1; +} +static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_warp_disable_f(void) +{ + return 0x0; +} +static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_sm_v(u32 r) +{ + return (r >> 2) & 0x1; +} +static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_sm_disable_f(void) +{ + return 0x0; +} +static inline u32 gr_gpc0_tpc0_sm_warp_valid_mask_r(void) +{ + return 0x00504614; +} +static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r(void) +{ + return 0x00504624; +} +static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r(void) +{ + return 0x00504634; +} +static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_stop_on_any_warp_disable_v(void) +{ + return 0x00000000; +} +static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_stop_on_any_sm_disable_v(void) +{ + return 0x00000000; +} static inline u32 gr_gpc0_tpc0_sm_dbgr_status0_r(void) { return 0x0050460c; } +static inline u32 gr_gpc0_tpc0_sm_dbgr_status0_sm_in_trap_mode_v(u32 r) +{ + return (r >> 0) & 0x1; +} static inline u32 gr_gpc0_tpc0_sm_dbgr_status0_locked_down_v(u32 r) { return (r >> 4) & 0x1; -- cgit v1.2.2