diff options
Diffstat (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c')
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 43 |
1 files changed, 42 insertions, 1 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index a7a804d2..110819a9 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * GV11b GPU GR | 2 | * GV11b GPU GR |
3 | * | 3 | * |
4 | * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. | 4 | * Copyright (c) 2016-2021, NVIDIA CORPORATION. All rights reserved. |
5 | * | 5 | * |
6 | * Permission is hereby granted, free of charge, to any person obtaining a | 6 | * Permission is hereby granted, free of charge, to any person obtaining a |
7 | * copy of this software and associated documentation files (the "Software"), | 7 | * copy of this software and associated documentation files (the "Software"), |
@@ -37,6 +37,7 @@ | |||
37 | #include <nvgpu/bitops.h> | 37 | #include <nvgpu/bitops.h> |
38 | #include <nvgpu/gk20a.h> | 38 | #include <nvgpu/gk20a.h> |
39 | #include <nvgpu/channel.h> | 39 | #include <nvgpu/channel.h> |
40 | #include <nvgpu/nvgpu_err.h> | ||
40 | 41 | ||
41 | #include "gk20a/gr_gk20a.h" | 42 | #include "gk20a/gr_gk20a.h" |
42 | #include "gk20a/dbg_gpu_gk20a.h" | 43 | #include "gk20a/dbg_gpu_gk20a.h" |
@@ -61,6 +62,8 @@ | |||
61 | #include <nvgpu/hw/gv11b/hw_pbdma_gv11b.h> | 62 | #include <nvgpu/hw/gv11b/hw_pbdma_gv11b.h> |
62 | #include <nvgpu/hw/gv11b/hw_perf_gv11b.h> | 63 | #include <nvgpu/hw/gv11b/hw_perf_gv11b.h> |
63 | 64 | ||
65 | #define SHIFT_8_BITS 8U | ||
66 | |||
64 | #define GFXP_WFI_TIMEOUT_COUNT_IN_USEC_DEFAULT 100 | 67 | #define GFXP_WFI_TIMEOUT_COUNT_IN_USEC_DEFAULT 100 |
65 | 68 | ||
66 | /* ecc scrubbing will done in 1 pri read cycle,but for safety used 10 retries */ | 69 | /* ecc scrubbing will done in 1 pri read cycle,but for safety used 10 retries */ |
@@ -224,6 +227,12 @@ static int gr_gv11b_handle_l1_tag_exception(struct gk20a *g, u32 gpc, u32 tpc, | |||
224 | } | 227 | } |
225 | g->ecc.gr.sm_l1_tag_ecc_corrected_err_count[gpc][tpc].counter += | 228 | g->ecc.gr.sm_l1_tag_ecc_corrected_err_count[gpc][tpc].counter += |
226 | l1_tag_corrected_err_count_delta; | 229 | l1_tag_corrected_err_count_delta; |
230 | |||
231 | nvgpu_report_ecc_err(g, NVGPU_ERR_MODULE_SM, | ||
232 | (gpc << SHIFT_8_BITS) | tpc, | ||
233 | GPU_SM_L1_TAG_ECC_CORRECTED, 0, | ||
234 | g->ecc.gr.sm_l1_tag_ecc_corrected_err_count[gpc][tpc].counter); | ||
235 | |||
227 | gk20a_writel(g, | 236 | gk20a_writel(g, |
228 | gr_pri_gpc0_tpc0_sm_l1_tag_ecc_corrected_err_count_r() + offset, | 237 | gr_pri_gpc0_tpc0_sm_l1_tag_ecc_corrected_err_count_r() + offset, |
229 | 0); | 238 | 0); |
@@ -240,6 +249,12 @@ static int gr_gv11b_handle_l1_tag_exception(struct gk20a *g, u32 gpc, u32 tpc, | |||
240 | } | 249 | } |
241 | g->ecc.gr.sm_l1_tag_ecc_uncorrected_err_count[gpc][tpc].counter += | 250 | g->ecc.gr.sm_l1_tag_ecc_uncorrected_err_count[gpc][tpc].counter += |
242 | l1_tag_uncorrected_err_count_delta; | 251 | l1_tag_uncorrected_err_count_delta; |
252 | |||
253 | nvgpu_report_ecc_err(g, NVGPU_ERR_MODULE_SM, | ||
254 | (gpc << SHIFT_8_BITS) | tpc, | ||
255 | GPU_SM_L1_TAG_ECC_UNCORRECTED, 0, | ||
256 | g->ecc.gr.sm_l1_tag_ecc_uncorrected_err_count[gpc][tpc].counter); | ||
257 | |||
243 | gk20a_writel(g, | 258 | gk20a_writel(g, |
244 | gr_pri_gpc0_tpc0_sm_l1_tag_ecc_uncorrected_err_count_r() + offset, | 259 | gr_pri_gpc0_tpc0_sm_l1_tag_ecc_uncorrected_err_count_r() + offset, |
245 | 0); | 260 | 0); |
@@ -335,6 +350,10 @@ static int gr_gv11b_handle_lrf_exception(struct gk20a *g, u32 gpc, u32 tpc, | |||
335 | } | 350 | } |
336 | g->ecc.gr.sm_lrf_ecc_double_err_count[gpc][tpc].counter += | 351 | g->ecc.gr.sm_lrf_ecc_double_err_count[gpc][tpc].counter += |
337 | lrf_uncorrected_err_count_delta; | 352 | lrf_uncorrected_err_count_delta; |
353 | nvgpu_report_ecc_err(g, NVGPU_ERR_MODULE_SM, | ||
354 | (gpc << SHIFT_8_BITS) | tpc, | ||
355 | GPU_SM_LRF_ECC_UNCORRECTED, 0, | ||
356 | g->ecc.gr.sm_lrf_ecc_double_err_count[gpc][tpc].counter); | ||
338 | gk20a_writel(g, | 357 | gk20a_writel(g, |
339 | gr_pri_gpc0_tpc0_sm_lrf_ecc_uncorrected_err_count_r() + offset, | 358 | gr_pri_gpc0_tpc0_sm_lrf_ecc_uncorrected_err_count_r() + offset, |
340 | 0); | 359 | 0); |
@@ -497,6 +516,12 @@ static int gr_gv11b_handle_cbu_exception(struct gk20a *g, u32 gpc, u32 tpc, | |||
497 | } | 516 | } |
498 | g->ecc.gr.sm_cbu_ecc_uncorrected_err_count[gpc][tpc].counter += | 517 | g->ecc.gr.sm_cbu_ecc_uncorrected_err_count[gpc][tpc].counter += |
499 | cbu_uncorrected_err_count_delta; | 518 | cbu_uncorrected_err_count_delta; |
519 | |||
520 | nvgpu_report_ecc_err(g, NVGPU_ERR_MODULE_SM, | ||
521 | (gpc << SHIFT_8_BITS) | tpc, | ||
522 | GPU_SM_CBU_ECC_UNCORRECTED, | ||
523 | 0, g->ecc.gr.sm_cbu_ecc_uncorrected_err_count[gpc][tpc].counter); | ||
524 | |||
500 | gk20a_writel(g, | 525 | gk20a_writel(g, |
501 | gr_pri_gpc0_tpc0_sm_cbu_ecc_uncorrected_err_count_r() + offset, | 526 | gr_pri_gpc0_tpc0_sm_cbu_ecc_uncorrected_err_count_r() + offset, |
502 | 0); | 527 | 0); |
@@ -580,6 +605,10 @@ static int gr_gv11b_handle_l1_data_exception(struct gk20a *g, u32 gpc, u32 tpc, | |||
580 | } | 605 | } |
581 | g->ecc.gr.sm_l1_data_ecc_uncorrected_err_count[gpc][tpc].counter += | 606 | g->ecc.gr.sm_l1_data_ecc_uncorrected_err_count[gpc][tpc].counter += |
582 | l1_data_uncorrected_err_count_delta; | 607 | l1_data_uncorrected_err_count_delta; |
608 | nvgpu_report_ecc_err(g, NVGPU_ERR_MODULE_SM, | ||
609 | (gpc << SHIFT_8_BITS) | tpc, | ||
610 | GPU_SM_L1_DATA_ECC_UNCORRECTED, | ||
611 | 0, g->ecc.gr.sm_l1_data_ecc_uncorrected_err_count[gpc][tpc].counter); | ||
583 | gk20a_writel(g, | 612 | gk20a_writel(g, |
584 | gr_pri_gpc0_tpc0_sm_l1_data_ecc_uncorrected_err_count_r() + offset, | 613 | gr_pri_gpc0_tpc0_sm_l1_data_ecc_uncorrected_err_count_r() + offset, |
585 | 0); | 614 | 0); |
@@ -2537,10 +2566,18 @@ static void gr_gv11b_handle_fecs_ecc_error(struct gk20a *g, u32 intr) | |||
2537 | 2566 | ||
2538 | if (ecc_status & | 2567 | if (ecc_status & |
2539 | gr_fecs_falcon_ecc_status_corrected_err_imem_m()) { | 2568 | gr_fecs_falcon_ecc_status_corrected_err_imem_m()) { |
2569 | nvgpu_report_ecc_err(g, NVGPU_ERR_MODULE_FECS, 0, | ||
2570 | GPU_FECS_FALCON_IMEM_ECC_CORRECTED, | ||
2571 | ecc_addr, | ||
2572 | g->ecc.gr.fecs_ecc_corrected_err_count[0].counter); | ||
2540 | nvgpu_log(g, gpu_dbg_intr, "imem ecc error corrected"); | 2573 | nvgpu_log(g, gpu_dbg_intr, "imem ecc error corrected"); |
2541 | } | 2574 | } |
2542 | if (ecc_status & | 2575 | if (ecc_status & |
2543 | gr_fecs_falcon_ecc_status_uncorrected_err_imem_m()) { | 2576 | gr_fecs_falcon_ecc_status_uncorrected_err_imem_m()) { |
2577 | nvgpu_report_ecc_err(g, NVGPU_ERR_MODULE_FECS, 0, | ||
2578 | GPU_FECS_FALCON_IMEM_ECC_UNCORRECTED, | ||
2579 | ecc_addr, | ||
2580 | g->ecc.gr.fecs_ecc_uncorrected_err_count[0].counter); | ||
2544 | nvgpu_log(g, gpu_dbg_intr, | 2581 | nvgpu_log(g, gpu_dbg_intr, |
2545 | "imem ecc error uncorrected"); | 2582 | "imem ecc error uncorrected"); |
2546 | } | 2583 | } |
@@ -2550,6 +2587,10 @@ static void gr_gv11b_handle_fecs_ecc_error(struct gk20a *g, u32 intr) | |||
2550 | } | 2587 | } |
2551 | if (ecc_status & | 2588 | if (ecc_status & |
2552 | gr_fecs_falcon_ecc_status_uncorrected_err_dmem_m()) { | 2589 | gr_fecs_falcon_ecc_status_uncorrected_err_dmem_m()) { |
2590 | nvgpu_report_ecc_err(g, NVGPU_ERR_MODULE_FECS, 0, | ||
2591 | GPU_FECS_FALCON_DMEM_ECC_UNCORRECTED, | ||
2592 | ecc_addr, | ||
2593 | g->ecc.gr.fecs_ecc_uncorrected_err_count[0].counter); | ||
2553 | nvgpu_log(g, gpu_dbg_intr, | 2594 | nvgpu_log(g, gpu_dbg_intr, |
2554 | "dmem ecc error uncorrected"); | 2595 | "dmem ecc error uncorrected"); |
2555 | } | 2596 | } |