diff options
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c | 44 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 45 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/gr_gm20b.c | 30 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h | 8 | ||||
-rw-r--r-- | include/uapi/linux/nvgpu.h | 29 |
8 files changed, 162 insertions, 1 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c index 95957788..d9c96417 100644 --- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c | |||
@@ -525,6 +525,45 @@ static int nvgpu_dbg_gpu_ioctl_set_next_stop_trigger_type( | |||
525 | return 0; | 525 | return 0; |
526 | } | 526 | } |
527 | 527 | ||
528 | static int nvgpu_dbg_gpu_ioctl_read_single_sm_error_state( | ||
529 | struct dbg_session_gk20a *dbg_s, | ||
530 | struct nvgpu_dbg_gpu_read_single_sm_error_state_args *args) | ||
531 | { | ||
532 | struct gk20a *g = get_gk20a(dbg_s->dev); | ||
533 | struct gr_gk20a *gr = &g->gr; | ||
534 | struct nvgpu_dbg_gpu_sm_error_state_record *sm_error_state; | ||
535 | u32 sm_id; | ||
536 | int err = 0; | ||
537 | |||
538 | sm_id = args->sm_id; | ||
539 | if (sm_id >= gr->no_of_sm) | ||
540 | return -EINVAL; | ||
541 | |||
542 | sm_error_state = gr->sm_error_states + sm_id; | ||
543 | |||
544 | if (args->sm_error_state_record_size > 0) { | ||
545 | size_t write_size = sizeof(*sm_error_state); | ||
546 | |||
547 | if (write_size > args->sm_error_state_record_size) | ||
548 | write_size = args->sm_error_state_record_size; | ||
549 | |||
550 | mutex_lock(&g->dbg_sessions_lock); | ||
551 | err = copy_to_user((void __user *)(uintptr_t) | ||
552 | args->sm_error_state_record_mem, | ||
553 | sm_error_state, | ||
554 | write_size); | ||
555 | mutex_unlock(&g->dbg_sessions_lock); | ||
556 | if (err) { | ||
557 | gk20a_err(dev_from_gk20a(g), "copy_to_user failed!\n"); | ||
558 | return err; | ||
559 | } | ||
560 | |||
561 | args->sm_error_state_record_size = write_size; | ||
562 | } | ||
563 | |||
564 | return 0; | ||
565 | } | ||
566 | |||
528 | long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, | 567 | long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, |
529 | unsigned long arg) | 568 | unsigned long arg) |
530 | { | 569 | { |
@@ -622,6 +661,11 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, | |||
622 | (struct nvgpu_dbg_gpu_timeout_args *)buf); | 661 | (struct nvgpu_dbg_gpu_timeout_args *)buf); |
623 | break; | 662 | break; |
624 | 663 | ||
664 | case NVGPU_DBG_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE: | ||
665 | err = nvgpu_dbg_gpu_ioctl_read_single_sm_error_state(dbg_s, | ||
666 | (struct nvgpu_dbg_gpu_read_single_sm_error_state_args *)buf); | ||
667 | break; | ||
668 | |||
625 | default: | 669 | default: |
626 | gk20a_err(dev_from_gk20a(g), | 670 | gk20a_err(dev_from_gk20a(g), |
627 | "unrecognized dbg gpu ioctl cmd: 0x%x", | 671 | "unrecognized dbg gpu ioctl cmd: 0x%x", |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 61e8e641..c70217ea 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -240,6 +240,8 @@ struct gpu_ops { | |||
240 | bool *post_event); | 240 | bool *post_event); |
241 | void (*create_gr_sysfs)(struct device *dev); | 241 | void (*create_gr_sysfs)(struct device *dev); |
242 | u32 (*get_lrf_tex_ltc_dram_override)(struct gk20a *g); | 242 | u32 (*get_lrf_tex_ltc_dram_override)(struct gk20a *g); |
243 | int (*record_sm_error_state)(struct gk20a *g, | ||
244 | u32 gpc, u32 tpc); | ||
243 | } gr; | 245 | } gr; |
244 | const char *name; | 246 | const char *name; |
245 | struct { | 247 | struct { |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 734552a1..c0a25e68 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -58,6 +58,7 @@ | |||
58 | #include "semaphore_gk20a.h" | 58 | #include "semaphore_gk20a.h" |
59 | #include "platform_gk20a.h" | 59 | #include "platform_gk20a.h" |
60 | #include "ctxsw_trace_gk20a.h" | 60 | #include "ctxsw_trace_gk20a.h" |
61 | #include "hw_proj_gk20a.h" | ||
61 | 62 | ||
62 | #define BLK_SIZE (256) | 63 | #define BLK_SIZE (256) |
63 | #define NV_PMM_FBP_STRIDE 0x1000 | 64 | #define NV_PMM_FBP_STRIDE 0x1000 |
@@ -3129,6 +3130,7 @@ static void gk20a_remove_gr_support(struct gr_gk20a *gr) | |||
3129 | 3130 | ||
3130 | memset(&gr->compbit_store, 0, sizeof(struct compbit_store_desc)); | 3131 | memset(&gr->compbit_store, 0, sizeof(struct compbit_store_desc)); |
3131 | 3132 | ||
3133 | kfree(gr->sm_error_states); | ||
3132 | kfree(gr->gpc_tpc_count); | 3134 | kfree(gr->gpc_tpc_count); |
3133 | kfree(gr->gpc_zcb_count); | 3135 | kfree(gr->gpc_zcb_count); |
3134 | kfree(gr->gpc_ppc_count); | 3136 | kfree(gr->gpc_ppc_count); |
@@ -4426,6 +4428,19 @@ restore_fe_go_idle: | |||
4426 | if (err) | 4428 | if (err) |
4427 | goto out; | 4429 | goto out; |
4428 | 4430 | ||
4431 | kfree(gr->sm_error_states); | ||
4432 | |||
4433 | /* we need to allocate this after g->ops.gr.init_fs_state() since | ||
4434 | * we initialize gr->no_of_sm in this function | ||
4435 | */ | ||
4436 | gr->sm_error_states = kzalloc( | ||
4437 | sizeof(struct nvgpu_dbg_gpu_sm_error_state_record) | ||
4438 | * gr->no_of_sm, GFP_KERNEL); | ||
4439 | if (!gr->sm_error_states) { | ||
4440 | err = -ENOMEM; | ||
4441 | goto restore_fe_go_idle; | ||
4442 | } | ||
4443 | |||
4429 | out: | 4444 | out: |
4430 | gk20a_dbg_fn("done"); | 4445 | gk20a_dbg_fn("done"); |
4431 | return 0; | 4446 | return 0; |
@@ -5494,6 +5509,32 @@ u32 gk20a_mask_hww_warp_esr(u32 hww_warp_esr) | |||
5494 | return hww_warp_esr; | 5509 | return hww_warp_esr; |
5495 | } | 5510 | } |
5496 | 5511 | ||
5512 | static int gk20a_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc) | ||
5513 | { | ||
5514 | int sm_id; | ||
5515 | struct gr_gk20a *gr = &g->gr; | ||
5516 | u32 offset = proj_gpc_stride_v() * gpc + | ||
5517 | proj_tpc_in_gpc_stride_v() * tpc; | ||
5518 | |||
5519 | mutex_lock(&g->dbg_sessions_lock); | ||
5520 | |||
5521 | sm_id = gr_gpc0_tpc0_sm_cfg_sm_id_v(gk20a_readl(g, | ||
5522 | gr_gpc0_tpc0_sm_cfg_r() + offset)); | ||
5523 | |||
5524 | gr->sm_error_states[sm_id].hww_global_esr = gk20a_readl(g, | ||
5525 | gr_gpc0_tpc0_sm_hww_global_esr_r() + offset); | ||
5526 | gr->sm_error_states[sm_id].hww_warp_esr = gk20a_readl(g, | ||
5527 | gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset); | ||
5528 | gr->sm_error_states[sm_id].hww_global_esr_report_mask = gk20a_readl(g, | ||
5529 | gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r() + offset); | ||
5530 | gr->sm_error_states[sm_id].hww_warp_esr_report_mask = gk20a_readl(g, | ||
5531 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r() + offset); | ||
5532 | |||
5533 | mutex_unlock(&g->dbg_sessions_lock); | ||
5534 | |||
5535 | return 0; | ||
5536 | } | ||
5537 | |||
5497 | int gr_gk20a_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, | 5538 | int gr_gk20a_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, |
5498 | bool *post_event, struct channel_gk20a *fault_ch) | 5539 | bool *post_event, struct channel_gk20a *fault_ch) |
5499 | { | 5540 | { |
@@ -5554,6 +5595,9 @@ int gr_gk20a_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, | |||
5554 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, | 5595 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, |
5555 | "sm hww global %08x warp %08x", global_esr, warp_esr); | 5596 | "sm hww global %08x warp %08x", global_esr, warp_esr); |
5556 | 5597 | ||
5598 | gr_gk20a_elpg_protected_call(g, | ||
5599 | g->ops.gr.record_sm_error_state(g, gpc, tpc)); | ||
5600 | |||
5557 | if (g->ops.gr.pre_process_sm_exception) { | 5601 | if (g->ops.gr.pre_process_sm_exception) { |
5558 | ret = g->ops.gr.pre_process_sm_exception(g, gpc, tpc, | 5602 | ret = g->ops.gr.pre_process_sm_exception(g, gpc, tpc, |
5559 | global_esr, warp_esr, | 5603 | global_esr, warp_esr, |
@@ -8370,4 +8414,5 @@ void gk20a_init_gr_ops(struct gpu_ops *gops) | |||
8370 | gops->gr.get_lrf_tex_ltc_dram_override = NULL; | 8414 | gops->gr.get_lrf_tex_ltc_dram_override = NULL; |
8371 | gops->gr.update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode; | 8415 | gops->gr.update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode; |
8372 | gops->gr.update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode; | 8416 | gops->gr.update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode; |
8417 | gops->gr.record_sm_error_state = gk20a_gr_record_sm_error_state; | ||
8373 | } | 8418 | } |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index c82cf75c..22ff1351 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h | |||
@@ -329,6 +329,7 @@ struct gr_gk20a { | |||
329 | u32 fbp_en_mask; | 329 | u32 fbp_en_mask; |
330 | u32 no_of_sm; | 330 | u32 no_of_sm; |
331 | struct sm_info *sm_to_cluster; | 331 | struct sm_info *sm_to_cluster; |
332 | struct nvgpu_dbg_gpu_sm_error_state_record *sm_error_states; | ||
332 | #if defined(CONFIG_GK20A_CYCLE_STATS) | 333 | #if defined(CONFIG_GK20A_CYCLE_STATS) |
333 | struct mutex cs_lock; | 334 | struct mutex cs_lock; |
334 | struct gk20a_cs_snapshot *cs_data; | 335 | struct gk20a_cs_snapshot *cs_data; |
diff --git a/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h index 48aa1524..ab2a975b 100644 --- a/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h | |||
@@ -2122,6 +2122,10 @@ static inline u32 gr_gpc0_tpc0_sm_cfg_sm_id_f(u32 v) | |||
2122 | { | 2122 | { |
2123 | return (v & 0xffff) << 0; | 2123 | return (v & 0xffff) << 0; |
2124 | } | 2124 | } |
2125 | static inline u32 gr_gpc0_tpc0_sm_cfg_sm_id_v(u32 r) | ||
2126 | { | ||
2127 | return (r >> 0) & 0xffff; | ||
2128 | } | ||
2125 | static inline u32 gr_gpc0_tpc0_sm_arch_r(void) | 2129 | static inline u32 gr_gpc0_tpc0_sm_arch_r(void) |
2126 | { | 2130 | { |
2127 | return 0x0050469c; | 2131 | return 0x0050469c; |
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index b49f2301..eeb70d76 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include "hw_fuse_gm20b.h" | 31 | #include "hw_fuse_gm20b.h" |
32 | #include "pmu_gm20b.h" | 32 | #include "pmu_gm20b.h" |
33 | #include "acr_gm20b.h" | 33 | #include "acr_gm20b.h" |
34 | #include "hw_proj_gm20b.h" | ||
34 | 35 | ||
35 | static void gr_gm20b_init_gpc_mmu(struct gk20a *g) | 36 | static void gr_gm20b_init_gpc_mmu(struct gk20a *g) |
36 | { | 37 | { |
@@ -1190,6 +1191,34 @@ static void gr_gm20b_get_access_map(struct gk20a *g, | |||
1190 | *num_entries = ARRAY_SIZE(wl_addr_gm20b); | 1191 | *num_entries = ARRAY_SIZE(wl_addr_gm20b); |
1191 | } | 1192 | } |
1192 | 1193 | ||
1194 | static int gm20b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc) | ||
1195 | { | ||
1196 | int sm_id; | ||
1197 | struct gr_gk20a *gr = &g->gr; | ||
1198 | u32 offset = proj_gpc_stride_v() * gpc + | ||
1199 | proj_tpc_in_gpc_stride_v() * tpc; | ||
1200 | |||
1201 | mutex_lock(&g->dbg_sessions_lock); | ||
1202 | |||
1203 | sm_id = gr_gpc0_tpc0_sm_cfg_sm_id_v(gk20a_readl(g, | ||
1204 | gr_gpc0_tpc0_sm_cfg_r() + offset)); | ||
1205 | |||
1206 | gr->sm_error_states[sm_id].hww_global_esr = gk20a_readl(g, | ||
1207 | gr_gpc0_tpc0_sm_hww_global_esr_r() + offset); | ||
1208 | gr->sm_error_states[sm_id].hww_warp_esr = gk20a_readl(g, | ||
1209 | gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset); | ||
1210 | gr->sm_error_states[sm_id].hww_warp_esr_pc = gk20a_readl(g, | ||
1211 | gr_gpc0_tpc0_sm_hww_warp_esr_pc_r() + offset); | ||
1212 | gr->sm_error_states[sm_id].hww_global_esr_report_mask = gk20a_readl(g, | ||
1213 | gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r() + offset); | ||
1214 | gr->sm_error_states[sm_id].hww_warp_esr_report_mask = gk20a_readl(g, | ||
1215 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r() + offset); | ||
1216 | |||
1217 | mutex_unlock(&g->dbg_sessions_lock); | ||
1218 | |||
1219 | return 0; | ||
1220 | } | ||
1221 | |||
1193 | void gm20b_init_gr(struct gpu_ops *gops) | 1222 | void gm20b_init_gr(struct gpu_ops *gops) |
1194 | { | 1223 | { |
1195 | gops->gr.init_gpc_mmu = gr_gm20b_init_gpc_mmu; | 1224 | gops->gr.init_gpc_mmu = gr_gm20b_init_gpc_mmu; |
@@ -1256,4 +1285,5 @@ void gm20b_init_gr(struct gpu_ops *gops) | |||
1256 | gops->gr.get_lrf_tex_ltc_dram_override = NULL; | 1285 | gops->gr.get_lrf_tex_ltc_dram_override = NULL; |
1257 | gops->gr.update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode; | 1286 | gops->gr.update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode; |
1258 | gops->gr.update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode; | 1287 | gops->gr.update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode; |
1288 | gops->gr.record_sm_error_state = gm20b_gr_record_sm_error_state; | ||
1259 | } | 1289 | } |
diff --git a/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h index dbe54860..b796e2d3 100644 --- a/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h +++ b/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h | |||
@@ -2130,6 +2130,10 @@ static inline u32 gr_gpc0_tpc0_sm_cfg_sm_id_f(u32 v) | |||
2130 | { | 2130 | { |
2131 | return (v & 0xffff) << 0; | 2131 | return (v & 0xffff) << 0; |
2132 | } | 2132 | } |
2133 | static inline u32 gr_gpc0_tpc0_sm_cfg_sm_id_v(u32 r) | ||
2134 | { | ||
2135 | return (r >> 0) & 0xffff; | ||
2136 | } | ||
2133 | static inline u32 gr_gpc0_tpc0_sm_arch_r(void) | 2137 | static inline u32 gr_gpc0_tpc0_sm_arch_r(void) |
2134 | { | 2138 | { |
2135 | return 0x0050469c; | 2139 | return 0x0050469c; |
@@ -3270,6 +3274,10 @@ static inline u32 gr_gpc0_tpc0_sm_hww_warp_esr_error_none_f(void) | |||
3270 | { | 3274 | { |
3271 | return 0x0; | 3275 | return 0x0; |
3272 | } | 3276 | } |
3277 | static inline u32 gr_gpc0_tpc0_sm_hww_warp_esr_pc_r(void) | ||
3278 | { | ||
3279 | return 0x00504654; | ||
3280 | } | ||
3273 | static inline u32 gr_gpc0_tpc0_sm_halfctl_ctrl_r(void) | 3281 | static inline u32 gr_gpc0_tpc0_sm_halfctl_ctrl_r(void) |
3274 | { | 3282 | { |
3275 | return 0x00504770; | 3283 | return 0x00504770; |
diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h index 16d60261..96619015 100644 --- a/include/uapi/linux/nvgpu.h +++ b/include/uapi/linux/nvgpu.h | |||
@@ -676,8 +676,35 @@ struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args { | |||
676 | #define NVGPU_DBG_GPU_IOCTL_HWPM_CTXSW_MODE \ | 676 | #define NVGPU_DBG_GPU_IOCTL_HWPM_CTXSW_MODE \ |
677 | _IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 13, struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args) | 677 | _IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 13, struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args) |
678 | 678 | ||
679 | |||
680 | struct nvgpu_dbg_gpu_sm_error_state_record { | ||
681 | __u32 hww_global_esr; | ||
682 | __u32 hww_warp_esr; | ||
683 | __u64 hww_warp_esr_pc; | ||
684 | __u32 hww_global_esr_report_mask; | ||
685 | __u32 hww_warp_esr_report_mask; | ||
686 | |||
687 | /* | ||
688 | * Notes | ||
689 | * - This struct can be safely appended with new fields. However, always | ||
690 | * keep the structure size multiple of 8 and make sure that the binary | ||
691 | * layout does not change between 32-bit and 64-bit architectures. | ||
692 | */ | ||
693 | }; | ||
694 | |||
695 | struct nvgpu_dbg_gpu_read_single_sm_error_state_args { | ||
696 | __u32 sm_id; | ||
697 | __u32 padding; | ||
698 | __u64 sm_error_state_record_mem; | ||
699 | __u64 sm_error_state_record_size; | ||
700 | }; | ||
701 | |||
702 | #define NVGPU_DBG_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE \ | ||
703 | _IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 14, struct nvgpu_dbg_gpu_read_single_sm_error_state_args) | ||
704 | |||
705 | |||
679 | #define NVGPU_DBG_GPU_IOCTL_LAST \ | 706 | #define NVGPU_DBG_GPU_IOCTL_LAST \ |
680 | _IOC_NR(NVGPU_DBG_GPU_IOCTL_HWPM_CTXSW_MODE) | 707 | _IOC_NR(NVGPU_DBG_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE) |
681 | 708 | ||
682 | #define NVGPU_DBG_GPU_IOCTL_MAX_ARG_SIZE \ | 709 | #define NVGPU_DBG_GPU_IOCTL_MAX_ARG_SIZE \ |
683 | sizeof(struct nvgpu_dbg_gpu_perfbuf_map_args) | 710 | sizeof(struct nvgpu_dbg_gpu_perfbuf_map_args) |