summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorDeepak Nibade <dnibade@nvidia.com>2016-03-09 04:21:43 -0500
committerTerje Bergstrom <tbergstrom@nvidia.com>2016-04-19 11:07:03 -0400
commit04e45bc943e9703c26f229dfbe558d94418acbe1 (patch)
tree541c62a32055255e82cc953b79b50c8925903f12 /drivers
parent9cf7e23f57d8669d99886a3c82d4997b94c35df8 (diff)
gpu: nvgpu: support storing/reading single SM error state
Add support to store error state of single SM before preprocessing SM exception Error state is stored as : struct nvgpu_dbg_gpu_sm_error_state_record { u32 hww_global_esr; u32 hww_warp_esr; u64 hww_warp_esr_pc; u32 hww_global_esr_report_mask; u32 hww_warp_esr_report_mask; } Note that we can safely append new fields to above structure in the future if required Also, add IOCTL NVGPU_DBG_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE to support reading SM's error state by user space Bug 200156699 Change-Id: I9a62cb01e8a35c720b52d5d202986347706c7308 Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: http://git-master/r/1120329 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c44
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h2
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c45
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.h1
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h4
-rw-r--r--drivers/gpu/nvgpu/gm20b/gr_gm20b.c30
-rw-r--r--drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h8
7 files changed, 134 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
index 95957788..d9c96417 100644
--- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
@@ -525,6 +525,45 @@ static int nvgpu_dbg_gpu_ioctl_set_next_stop_trigger_type(
525 return 0; 525 return 0;
526} 526}
527 527
528static int nvgpu_dbg_gpu_ioctl_read_single_sm_error_state(
529 struct dbg_session_gk20a *dbg_s,
530 struct nvgpu_dbg_gpu_read_single_sm_error_state_args *args)
531{
532 struct gk20a *g = get_gk20a(dbg_s->dev);
533 struct gr_gk20a *gr = &g->gr;
534 struct nvgpu_dbg_gpu_sm_error_state_record *sm_error_state;
535 u32 sm_id;
536 int err = 0;
537
538 sm_id = args->sm_id;
539 if (sm_id >= gr->no_of_sm)
540 return -EINVAL;
541
542 sm_error_state = gr->sm_error_states + sm_id;
543
544 if (args->sm_error_state_record_size > 0) {
545 size_t write_size = sizeof(*sm_error_state);
546
547 if (write_size > args->sm_error_state_record_size)
548 write_size = args->sm_error_state_record_size;
549
550 mutex_lock(&g->dbg_sessions_lock);
551 err = copy_to_user((void __user *)(uintptr_t)
552 args->sm_error_state_record_mem,
553 sm_error_state,
554 write_size);
555 mutex_unlock(&g->dbg_sessions_lock);
556 if (err) {
557 gk20a_err(dev_from_gk20a(g), "copy_to_user failed!\n");
558 return err;
559 }
560
561 args->sm_error_state_record_size = write_size;
562 }
563
564 return 0;
565}
566
528long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, 567long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
529 unsigned long arg) 568 unsigned long arg)
530{ 569{
@@ -622,6 +661,11 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
622 (struct nvgpu_dbg_gpu_timeout_args *)buf); 661 (struct nvgpu_dbg_gpu_timeout_args *)buf);
623 break; 662 break;
624 663
664 case NVGPU_DBG_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE:
665 err = nvgpu_dbg_gpu_ioctl_read_single_sm_error_state(dbg_s,
666 (struct nvgpu_dbg_gpu_read_single_sm_error_state_args *)buf);
667 break;
668
625 default: 669 default:
626 gk20a_err(dev_from_gk20a(g), 670 gk20a_err(dev_from_gk20a(g),
627 "unrecognized dbg gpu ioctl cmd: 0x%x", 671 "unrecognized dbg gpu ioctl cmd: 0x%x",
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 61e8e641..c70217ea 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -240,6 +240,8 @@ struct gpu_ops {
240 bool *post_event); 240 bool *post_event);
241 void (*create_gr_sysfs)(struct device *dev); 241 void (*create_gr_sysfs)(struct device *dev);
242 u32 (*get_lrf_tex_ltc_dram_override)(struct gk20a *g); 242 u32 (*get_lrf_tex_ltc_dram_override)(struct gk20a *g);
243 int (*record_sm_error_state)(struct gk20a *g,
244 u32 gpc, u32 tpc);
243 } gr; 245 } gr;
244 const char *name; 246 const char *name;
245 struct { 247 struct {
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 734552a1..c0a25e68 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -58,6 +58,7 @@
58#include "semaphore_gk20a.h" 58#include "semaphore_gk20a.h"
59#include "platform_gk20a.h" 59#include "platform_gk20a.h"
60#include "ctxsw_trace_gk20a.h" 60#include "ctxsw_trace_gk20a.h"
61#include "hw_proj_gk20a.h"
61 62
62#define BLK_SIZE (256) 63#define BLK_SIZE (256)
63#define NV_PMM_FBP_STRIDE 0x1000 64#define NV_PMM_FBP_STRIDE 0x1000
@@ -3129,6 +3130,7 @@ static void gk20a_remove_gr_support(struct gr_gk20a *gr)
3129 3130
3130 memset(&gr->compbit_store, 0, sizeof(struct compbit_store_desc)); 3131 memset(&gr->compbit_store, 0, sizeof(struct compbit_store_desc));
3131 3132
3133 kfree(gr->sm_error_states);
3132 kfree(gr->gpc_tpc_count); 3134 kfree(gr->gpc_tpc_count);
3133 kfree(gr->gpc_zcb_count); 3135 kfree(gr->gpc_zcb_count);
3134 kfree(gr->gpc_ppc_count); 3136 kfree(gr->gpc_ppc_count);
@@ -4426,6 +4428,19 @@ restore_fe_go_idle:
4426 if (err) 4428 if (err)
4427 goto out; 4429 goto out;
4428 4430
4431 kfree(gr->sm_error_states);
4432
4433 /* we need to allocate this after g->ops.gr.init_fs_state() since
4434 * we initialize gr->no_of_sm in this function
4435 */
4436 gr->sm_error_states = kzalloc(
4437 sizeof(struct nvgpu_dbg_gpu_sm_error_state_record)
4438 * gr->no_of_sm, GFP_KERNEL);
4439 if (!gr->sm_error_states) {
4440 err = -ENOMEM;
4441 goto restore_fe_go_idle;
4442 }
4443
4429out: 4444out:
4430 gk20a_dbg_fn("done"); 4445 gk20a_dbg_fn("done");
4431 return 0; 4446 return 0;
@@ -5494,6 +5509,32 @@ u32 gk20a_mask_hww_warp_esr(u32 hww_warp_esr)
5494 return hww_warp_esr; 5509 return hww_warp_esr;
5495} 5510}
5496 5511
5512static int gk20a_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc)
5513{
5514 int sm_id;
5515 struct gr_gk20a *gr = &g->gr;
5516 u32 offset = proj_gpc_stride_v() * gpc +
5517 proj_tpc_in_gpc_stride_v() * tpc;
5518
5519 mutex_lock(&g->dbg_sessions_lock);
5520
5521 sm_id = gr_gpc0_tpc0_sm_cfg_sm_id_v(gk20a_readl(g,
5522 gr_gpc0_tpc0_sm_cfg_r() + offset));
5523
5524 gr->sm_error_states[sm_id].hww_global_esr = gk20a_readl(g,
5525 gr_gpc0_tpc0_sm_hww_global_esr_r() + offset);
5526 gr->sm_error_states[sm_id].hww_warp_esr = gk20a_readl(g,
5527 gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset);
5528 gr->sm_error_states[sm_id].hww_global_esr_report_mask = gk20a_readl(g,
5529 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r() + offset);
5530 gr->sm_error_states[sm_id].hww_warp_esr_report_mask = gk20a_readl(g,
5531 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r() + offset);
5532
5533 mutex_unlock(&g->dbg_sessions_lock);
5534
5535 return 0;
5536}
5537
5497int gr_gk20a_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, 5538int gr_gk20a_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc,
5498 bool *post_event, struct channel_gk20a *fault_ch) 5539 bool *post_event, struct channel_gk20a *fault_ch)
5499{ 5540{
@@ -5554,6 +5595,9 @@ int gr_gk20a_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc,
5554 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, 5595 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg,
5555 "sm hww global %08x warp %08x", global_esr, warp_esr); 5596 "sm hww global %08x warp %08x", global_esr, warp_esr);
5556 5597
5598 gr_gk20a_elpg_protected_call(g,
5599 g->ops.gr.record_sm_error_state(g, gpc, tpc));
5600
5557 if (g->ops.gr.pre_process_sm_exception) { 5601 if (g->ops.gr.pre_process_sm_exception) {
5558 ret = g->ops.gr.pre_process_sm_exception(g, gpc, tpc, 5602 ret = g->ops.gr.pre_process_sm_exception(g, gpc, tpc,
5559 global_esr, warp_esr, 5603 global_esr, warp_esr,
@@ -8370,4 +8414,5 @@ void gk20a_init_gr_ops(struct gpu_ops *gops)
8370 gops->gr.get_lrf_tex_ltc_dram_override = NULL; 8414 gops->gr.get_lrf_tex_ltc_dram_override = NULL;
8371 gops->gr.update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode; 8415 gops->gr.update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode;
8372 gops->gr.update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode; 8416 gops->gr.update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode;
8417 gops->gr.record_sm_error_state = gk20a_gr_record_sm_error_state;
8373} 8418}
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
index c82cf75c..22ff1351 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -329,6 +329,7 @@ struct gr_gk20a {
329 u32 fbp_en_mask; 329 u32 fbp_en_mask;
330 u32 no_of_sm; 330 u32 no_of_sm;
331 struct sm_info *sm_to_cluster; 331 struct sm_info *sm_to_cluster;
332 struct nvgpu_dbg_gpu_sm_error_state_record *sm_error_states;
332#if defined(CONFIG_GK20A_CYCLE_STATS) 333#if defined(CONFIG_GK20A_CYCLE_STATS)
333 struct mutex cs_lock; 334 struct mutex cs_lock;
334 struct gk20a_cs_snapshot *cs_data; 335 struct gk20a_cs_snapshot *cs_data;
diff --git a/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h
index 48aa1524..ab2a975b 100644
--- a/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h
@@ -2122,6 +2122,10 @@ static inline u32 gr_gpc0_tpc0_sm_cfg_sm_id_f(u32 v)
2122{ 2122{
2123 return (v & 0xffff) << 0; 2123 return (v & 0xffff) << 0;
2124} 2124}
2125static inline u32 gr_gpc0_tpc0_sm_cfg_sm_id_v(u32 r)
2126{
2127 return (r >> 0) & 0xffff;
2128}
2125static inline u32 gr_gpc0_tpc0_sm_arch_r(void) 2129static inline u32 gr_gpc0_tpc0_sm_arch_r(void)
2126{ 2130{
2127 return 0x0050469c; 2131 return 0x0050469c;
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
index b49f2301..eeb70d76 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -31,6 +31,7 @@
31#include "hw_fuse_gm20b.h" 31#include "hw_fuse_gm20b.h"
32#include "pmu_gm20b.h" 32#include "pmu_gm20b.h"
33#include "acr_gm20b.h" 33#include "acr_gm20b.h"
34#include "hw_proj_gm20b.h"
34 35
35static void gr_gm20b_init_gpc_mmu(struct gk20a *g) 36static void gr_gm20b_init_gpc_mmu(struct gk20a *g)
36{ 37{
@@ -1190,6 +1191,34 @@ static void gr_gm20b_get_access_map(struct gk20a *g,
1190 *num_entries = ARRAY_SIZE(wl_addr_gm20b); 1191 *num_entries = ARRAY_SIZE(wl_addr_gm20b);
1191} 1192}
1192 1193
1194static int gm20b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc)
1195{
1196 int sm_id;
1197 struct gr_gk20a *gr = &g->gr;
1198 u32 offset = proj_gpc_stride_v() * gpc +
1199 proj_tpc_in_gpc_stride_v() * tpc;
1200
1201 mutex_lock(&g->dbg_sessions_lock);
1202
1203 sm_id = gr_gpc0_tpc0_sm_cfg_sm_id_v(gk20a_readl(g,
1204 gr_gpc0_tpc0_sm_cfg_r() + offset));
1205
1206 gr->sm_error_states[sm_id].hww_global_esr = gk20a_readl(g,
1207 gr_gpc0_tpc0_sm_hww_global_esr_r() + offset);
1208 gr->sm_error_states[sm_id].hww_warp_esr = gk20a_readl(g,
1209 gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset);
1210 gr->sm_error_states[sm_id].hww_warp_esr_pc = gk20a_readl(g,
1211 gr_gpc0_tpc0_sm_hww_warp_esr_pc_r() + offset);
1212 gr->sm_error_states[sm_id].hww_global_esr_report_mask = gk20a_readl(g,
1213 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r() + offset);
1214 gr->sm_error_states[sm_id].hww_warp_esr_report_mask = gk20a_readl(g,
1215 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r() + offset);
1216
1217 mutex_unlock(&g->dbg_sessions_lock);
1218
1219 return 0;
1220}
1221
1193void gm20b_init_gr(struct gpu_ops *gops) 1222void gm20b_init_gr(struct gpu_ops *gops)
1194{ 1223{
1195 gops->gr.init_gpc_mmu = gr_gm20b_init_gpc_mmu; 1224 gops->gr.init_gpc_mmu = gr_gm20b_init_gpc_mmu;
@@ -1256,4 +1285,5 @@ void gm20b_init_gr(struct gpu_ops *gops)
1256 gops->gr.get_lrf_tex_ltc_dram_override = NULL; 1285 gops->gr.get_lrf_tex_ltc_dram_override = NULL;
1257 gops->gr.update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode; 1286 gops->gr.update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode;
1258 gops->gr.update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode; 1287 gops->gr.update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode;
1288 gops->gr.record_sm_error_state = gm20b_gr_record_sm_error_state;
1259} 1289}
diff --git a/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h
index dbe54860..b796e2d3 100644
--- a/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h
+++ b/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h
@@ -2130,6 +2130,10 @@ static inline u32 gr_gpc0_tpc0_sm_cfg_sm_id_f(u32 v)
2130{ 2130{
2131 return (v & 0xffff) << 0; 2131 return (v & 0xffff) << 0;
2132} 2132}
2133static inline u32 gr_gpc0_tpc0_sm_cfg_sm_id_v(u32 r)
2134{
2135 return (r >> 0) & 0xffff;
2136}
2133static inline u32 gr_gpc0_tpc0_sm_arch_r(void) 2137static inline u32 gr_gpc0_tpc0_sm_arch_r(void)
2134{ 2138{
2135 return 0x0050469c; 2139 return 0x0050469c;
@@ -3270,6 +3274,10 @@ static inline u32 gr_gpc0_tpc0_sm_hww_warp_esr_error_none_f(void)
3270{ 3274{
3271 return 0x0; 3275 return 0x0;
3272} 3276}
3277static inline u32 gr_gpc0_tpc0_sm_hww_warp_esr_pc_r(void)
3278{
3279 return 0x00504654;
3280}
3273static inline u32 gr_gpc0_tpc0_sm_halfctl_ctrl_r(void) 3281static inline u32 gr_gpc0_tpc0_sm_halfctl_ctrl_r(void)
3274{ 3282{
3275 return 0x00504770; 3283 return 0x00504770;