diff options
author | Deepak Nibade <dnibade@nvidia.com> | 2016-03-09 04:21:43 -0500 |
---|---|---|
committer | Terje Bergstrom <tbergstrom@nvidia.com> | 2016-04-19 11:07:03 -0400 |
commit | 04e45bc943e9703c26f229dfbe558d94418acbe1 (patch) | |
tree | 541c62a32055255e82cc953b79b50c8925903f12 /drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c | |
parent | 9cf7e23f57d8669d99886a3c82d4997b94c35df8 (diff) |
gpu: nvgpu: support storing/reading single SM error state
Add support to store error state of single SM before
preprocessing SM exception
Error state is stored as :
struct nvgpu_dbg_gpu_sm_error_state_record {
u32 hww_global_esr;
u32 hww_warp_esr;
u64 hww_warp_esr_pc;
u32 hww_global_esr_report_mask;
u32 hww_warp_esr_report_mask;
}
Note that we can safely append new fields to above
structure in the future if required
Also, add IOCTL NVGPU_DBG_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE
to support reading SM's error state by user space
Bug 200156699
Change-Id: I9a62cb01e8a35c720b52d5d202986347706c7308
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/1120329
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c | 44 |
1 files changed, 44 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c index 95957788..d9c96417 100644 --- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c | |||
@@ -525,6 +525,45 @@ static int nvgpu_dbg_gpu_ioctl_set_next_stop_trigger_type( | |||
525 | return 0; | 525 | return 0; |
526 | } | 526 | } |
527 | 527 | ||
528 | static int nvgpu_dbg_gpu_ioctl_read_single_sm_error_state( | ||
529 | struct dbg_session_gk20a *dbg_s, | ||
530 | struct nvgpu_dbg_gpu_read_single_sm_error_state_args *args) | ||
531 | { | ||
532 | struct gk20a *g = get_gk20a(dbg_s->dev); | ||
533 | struct gr_gk20a *gr = &g->gr; | ||
534 | struct nvgpu_dbg_gpu_sm_error_state_record *sm_error_state; | ||
535 | u32 sm_id; | ||
536 | int err = 0; | ||
537 | |||
538 | sm_id = args->sm_id; | ||
539 | if (sm_id >= gr->no_of_sm) | ||
540 | return -EINVAL; | ||
541 | |||
542 | sm_error_state = gr->sm_error_states + sm_id; | ||
543 | |||
544 | if (args->sm_error_state_record_size > 0) { | ||
545 | size_t write_size = sizeof(*sm_error_state); | ||
546 | |||
547 | if (write_size > args->sm_error_state_record_size) | ||
548 | write_size = args->sm_error_state_record_size; | ||
549 | |||
550 | mutex_lock(&g->dbg_sessions_lock); | ||
551 | err = copy_to_user((void __user *)(uintptr_t) | ||
552 | args->sm_error_state_record_mem, | ||
553 | sm_error_state, | ||
554 | write_size); | ||
555 | mutex_unlock(&g->dbg_sessions_lock); | ||
556 | if (err) { | ||
557 | gk20a_err(dev_from_gk20a(g), "copy_to_user failed!\n"); | ||
558 | return err; | ||
559 | } | ||
560 | |||
561 | args->sm_error_state_record_size = write_size; | ||
562 | } | ||
563 | |||
564 | return 0; | ||
565 | } | ||
566 | |||
528 | long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, | 567 | long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, |
529 | unsigned long arg) | 568 | unsigned long arg) |
530 | { | 569 | { |
@@ -622,6 +661,11 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, | |||
622 | (struct nvgpu_dbg_gpu_timeout_args *)buf); | 661 | (struct nvgpu_dbg_gpu_timeout_args *)buf); |
623 | break; | 662 | break; |
624 | 663 | ||
664 | case NVGPU_DBG_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE: | ||
665 | err = nvgpu_dbg_gpu_ioctl_read_single_sm_error_state(dbg_s, | ||
666 | (struct nvgpu_dbg_gpu_read_single_sm_error_state_args *)buf); | ||
667 | break; | ||
668 | |||
625 | default: | 669 | default: |
626 | gk20a_err(dev_from_gk20a(g), | 670 | gk20a_err(dev_from_gk20a(g), |
627 | "unrecognized dbg gpu ioctl cmd: 0x%x", | 671 | "unrecognized dbg gpu ioctl cmd: 0x%x", |