summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c')
-rw-r--r--drivers/gpu/nvgpu/gv11b/gr_gv11b.c43
1 files changed, 42 insertions, 1 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
index a7a804d2..110819a9 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * GV11b GPU GR 2 * GV11b GPU GR
3 * 3 *
4 * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. 4 * Copyright (c) 2016-2021, NVIDIA CORPORATION. All rights reserved.
5 * 5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a 6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"), 7 * copy of this software and associated documentation files (the "Software"),
@@ -37,6 +37,7 @@
37#include <nvgpu/bitops.h> 37#include <nvgpu/bitops.h>
38#include <nvgpu/gk20a.h> 38#include <nvgpu/gk20a.h>
39#include <nvgpu/channel.h> 39#include <nvgpu/channel.h>
40#include <nvgpu/nvgpu_err.h>
40 41
41#include "gk20a/gr_gk20a.h" 42#include "gk20a/gr_gk20a.h"
42#include "gk20a/dbg_gpu_gk20a.h" 43#include "gk20a/dbg_gpu_gk20a.h"
@@ -61,6 +62,8 @@
61#include <nvgpu/hw/gv11b/hw_pbdma_gv11b.h> 62#include <nvgpu/hw/gv11b/hw_pbdma_gv11b.h>
62#include <nvgpu/hw/gv11b/hw_perf_gv11b.h> 63#include <nvgpu/hw/gv11b/hw_perf_gv11b.h>
63 64
65#define SHIFT_8_BITS 8U
66
64#define GFXP_WFI_TIMEOUT_COUNT_IN_USEC_DEFAULT 100 67#define GFXP_WFI_TIMEOUT_COUNT_IN_USEC_DEFAULT 100
65 68
66/* ecc scrubbing will done in 1 pri read cycle,but for safety used 10 retries */ 69/* ecc scrubbing will done in 1 pri read cycle,but for safety used 10 retries */
@@ -224,6 +227,12 @@ static int gr_gv11b_handle_l1_tag_exception(struct gk20a *g, u32 gpc, u32 tpc,
224 } 227 }
225 g->ecc.gr.sm_l1_tag_ecc_corrected_err_count[gpc][tpc].counter += 228 g->ecc.gr.sm_l1_tag_ecc_corrected_err_count[gpc][tpc].counter +=
226 l1_tag_corrected_err_count_delta; 229 l1_tag_corrected_err_count_delta;
230
231 nvgpu_report_ecc_err(g, NVGPU_ERR_MODULE_SM,
232 (gpc << SHIFT_8_BITS) | tpc,
233 GPU_SM_L1_TAG_ECC_CORRECTED, 0,
234 g->ecc.gr.sm_l1_tag_ecc_corrected_err_count[gpc][tpc].counter);
235
227 gk20a_writel(g, 236 gk20a_writel(g,
228 gr_pri_gpc0_tpc0_sm_l1_tag_ecc_corrected_err_count_r() + offset, 237 gr_pri_gpc0_tpc0_sm_l1_tag_ecc_corrected_err_count_r() + offset,
229 0); 238 0);
@@ -240,6 +249,12 @@ static int gr_gv11b_handle_l1_tag_exception(struct gk20a *g, u32 gpc, u32 tpc,
240 } 249 }
241 g->ecc.gr.sm_l1_tag_ecc_uncorrected_err_count[gpc][tpc].counter += 250 g->ecc.gr.sm_l1_tag_ecc_uncorrected_err_count[gpc][tpc].counter +=
242 l1_tag_uncorrected_err_count_delta; 251 l1_tag_uncorrected_err_count_delta;
252
253 nvgpu_report_ecc_err(g, NVGPU_ERR_MODULE_SM,
254 (gpc << SHIFT_8_BITS) | tpc,
255 GPU_SM_L1_TAG_ECC_UNCORRECTED, 0,
256 g->ecc.gr.sm_l1_tag_ecc_uncorrected_err_count[gpc][tpc].counter);
257
243 gk20a_writel(g, 258 gk20a_writel(g,
244 gr_pri_gpc0_tpc0_sm_l1_tag_ecc_uncorrected_err_count_r() + offset, 259 gr_pri_gpc0_tpc0_sm_l1_tag_ecc_uncorrected_err_count_r() + offset,
245 0); 260 0);
@@ -335,6 +350,10 @@ static int gr_gv11b_handle_lrf_exception(struct gk20a *g, u32 gpc, u32 tpc,
335 } 350 }
336 g->ecc.gr.sm_lrf_ecc_double_err_count[gpc][tpc].counter += 351 g->ecc.gr.sm_lrf_ecc_double_err_count[gpc][tpc].counter +=
337 lrf_uncorrected_err_count_delta; 352 lrf_uncorrected_err_count_delta;
353 nvgpu_report_ecc_err(g, NVGPU_ERR_MODULE_SM,
354 (gpc << SHIFT_8_BITS) | tpc,
355 GPU_SM_LRF_ECC_UNCORRECTED, 0,
356 g->ecc.gr.sm_lrf_ecc_double_err_count[gpc][tpc].counter);
338 gk20a_writel(g, 357 gk20a_writel(g,
339 gr_pri_gpc0_tpc0_sm_lrf_ecc_uncorrected_err_count_r() + offset, 358 gr_pri_gpc0_tpc0_sm_lrf_ecc_uncorrected_err_count_r() + offset,
340 0); 359 0);
@@ -497,6 +516,12 @@ static int gr_gv11b_handle_cbu_exception(struct gk20a *g, u32 gpc, u32 tpc,
497 } 516 }
498 g->ecc.gr.sm_cbu_ecc_uncorrected_err_count[gpc][tpc].counter += 517 g->ecc.gr.sm_cbu_ecc_uncorrected_err_count[gpc][tpc].counter +=
499 cbu_uncorrected_err_count_delta; 518 cbu_uncorrected_err_count_delta;
519
520 nvgpu_report_ecc_err(g, NVGPU_ERR_MODULE_SM,
521 (gpc << SHIFT_8_BITS) | tpc,
522 GPU_SM_CBU_ECC_UNCORRECTED,
523 0, g->ecc.gr.sm_cbu_ecc_uncorrected_err_count[gpc][tpc].counter);
524
500 gk20a_writel(g, 525 gk20a_writel(g,
501 gr_pri_gpc0_tpc0_sm_cbu_ecc_uncorrected_err_count_r() + offset, 526 gr_pri_gpc0_tpc0_sm_cbu_ecc_uncorrected_err_count_r() + offset,
502 0); 527 0);
@@ -580,6 +605,10 @@ static int gr_gv11b_handle_l1_data_exception(struct gk20a *g, u32 gpc, u32 tpc,
580 } 605 }
581 g->ecc.gr.sm_l1_data_ecc_uncorrected_err_count[gpc][tpc].counter += 606 g->ecc.gr.sm_l1_data_ecc_uncorrected_err_count[gpc][tpc].counter +=
582 l1_data_uncorrected_err_count_delta; 607 l1_data_uncorrected_err_count_delta;
608 nvgpu_report_ecc_err(g, NVGPU_ERR_MODULE_SM,
609 (gpc << SHIFT_8_BITS) | tpc,
610 GPU_SM_L1_DATA_ECC_UNCORRECTED,
611 0, g->ecc.gr.sm_l1_data_ecc_uncorrected_err_count[gpc][tpc].counter);
583 gk20a_writel(g, 612 gk20a_writel(g,
584 gr_pri_gpc0_tpc0_sm_l1_data_ecc_uncorrected_err_count_r() + offset, 613 gr_pri_gpc0_tpc0_sm_l1_data_ecc_uncorrected_err_count_r() + offset,
585 0); 614 0);
@@ -2537,10 +2566,18 @@ static void gr_gv11b_handle_fecs_ecc_error(struct gk20a *g, u32 intr)
2537 2566
2538 if (ecc_status & 2567 if (ecc_status &
2539 gr_fecs_falcon_ecc_status_corrected_err_imem_m()) { 2568 gr_fecs_falcon_ecc_status_corrected_err_imem_m()) {
2569 nvgpu_report_ecc_err(g, NVGPU_ERR_MODULE_FECS, 0,
2570 GPU_FECS_FALCON_IMEM_ECC_CORRECTED,
2571 ecc_addr,
2572 g->ecc.gr.fecs_ecc_corrected_err_count[0].counter);
2540 nvgpu_log(g, gpu_dbg_intr, "imem ecc error corrected"); 2573 nvgpu_log(g, gpu_dbg_intr, "imem ecc error corrected");
2541 } 2574 }
2542 if (ecc_status & 2575 if (ecc_status &
2543 gr_fecs_falcon_ecc_status_uncorrected_err_imem_m()) { 2576 gr_fecs_falcon_ecc_status_uncorrected_err_imem_m()) {
2577 nvgpu_report_ecc_err(g, NVGPU_ERR_MODULE_FECS, 0,
2578 GPU_FECS_FALCON_IMEM_ECC_UNCORRECTED,
2579 ecc_addr,
2580 g->ecc.gr.fecs_ecc_uncorrected_err_count[0].counter);
2544 nvgpu_log(g, gpu_dbg_intr, 2581 nvgpu_log(g, gpu_dbg_intr,
2545 "imem ecc error uncorrected"); 2582 "imem ecc error uncorrected");
2546 } 2583 }
@@ -2550,6 +2587,10 @@ static void gr_gv11b_handle_fecs_ecc_error(struct gk20a *g, u32 intr)
2550 } 2587 }
2551 if (ecc_status & 2588 if (ecc_status &
2552 gr_fecs_falcon_ecc_status_uncorrected_err_dmem_m()) { 2589 gr_fecs_falcon_ecc_status_uncorrected_err_dmem_m()) {
2590 nvgpu_report_ecc_err(g, NVGPU_ERR_MODULE_FECS, 0,
2591 GPU_FECS_FALCON_DMEM_ECC_UNCORRECTED,
2592 ecc_addr,
2593 g->ecc.gr.fecs_ecc_uncorrected_err_count[0].counter);
2553 nvgpu_log(g, gpu_dbg_intr, 2594 nvgpu_log(g, gpu_dbg_intr,
2554 "dmem ecc error uncorrected"); 2595 "dmem ecc error uncorrected");
2555 } 2596 }