summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gv11b
diff options
context:
space:
mode:
authorDeepak Goyal <dgoyal@nvidia.com>2017-11-15 01:10:54 -0500
committermobile promotions <svcmobile_promotions@nvidia.com>2017-12-14 12:03:45 -0500
commit49be5d49292c9c853f5b6ad53c32d59f866322ec (patch)
tree8e34c42aff1cad6ea0fe4e2d9885dcd9043ef1ab /drivers/gpu/nvgpu/gv11b
parent1bf9b91c05ceebf872171a536c2660ee69fa5f64 (diff)
gpu: nvgpu: gv11b: implement ecc scrubber
Check the availability of ecc units by checking relevant ecc fuse and fuse overrides. During gpu boot, initialize ecc units by scrubbing individual ecc units available. ECC initialization should be done before gr initialization. Following ecc units are scrubbed: SM LRF SM L1 DATA SM L1 TAG SM CBU SM ICACHE Bug 200339497 Change-Id: I54bf8cc1fce639a9993bf80984dafc28dca0dba3 Signed-off-by: Deepak Goyal <dgoyal@nvidia.com> Signed-off-by: seshendra Gadagottu <sgadagottu@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1612734 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gv11b')
-rw-r--r--drivers/gpu/nvgpu/gv11b/gr_gv11b.c185
-rw-r--r--drivers/gpu/nvgpu/gv11b/gr_gv11b.h1
-rw-r--r--drivers/gpu/nvgpu/gv11b/gv11b.c120
-rw-r--r--drivers/gpu/nvgpu/gv11b/gv11b.h1
-rw-r--r--drivers/gpu/nvgpu/gv11b/hal_gv11b.c1
5 files changed, 308 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
index 033d83d5..8514cc1e 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
@@ -44,6 +44,7 @@
44#include "gv11b/gr_gv11b.h" 44#include "gv11b/gr_gv11b.h"
45#include "gv11b/mm_gv11b.h" 45#include "gv11b/mm_gv11b.h"
46#include "gv11b/subctx_gv11b.h" 46#include "gv11b/subctx_gv11b.h"
47#include "gv11b/gv11b.h"
47 48
48#include <nvgpu/hw/gv11b/hw_gr_gv11b.h> 49#include <nvgpu/hw/gv11b/hw_gr_gv11b.h>
49#include <nvgpu/hw/gv11b/hw_fifo_gv11b.h> 50#include <nvgpu/hw/gv11b/hw_fifo_gv11b.h>
@@ -57,6 +58,10 @@
57 58
58#define GFXP_WFI_TIMEOUT_COUNT_IN_USEC_DEFAULT 1000 59#define GFXP_WFI_TIMEOUT_COUNT_IN_USEC_DEFAULT 1000
59 60
61/* ecc scrubbing will done in 1 pri read cycle,but for safety used 10 retries */
62#define ECC_SCRUBBING_TIMEOUT_MAX 1000
63#define ECC_SCRUBBING_TIMEOUT_DEFAULT 10
64
60bool gr_gv11b_is_valid_class(struct gk20a *g, u32 class_num) 65bool gr_gv11b_is_valid_class(struct gk20a *g, u32 class_num)
61{ 66{
62 bool valid = false; 67 bool valid = false;
@@ -3674,3 +3679,183 @@ unsigned long gr_gv11b_get_max_gfxp_wfi_timeout_count(struct gk20a *g)
3674 /* 100 msec in usec count */ 3679 /* 100 msec in usec count */
3675 return (100 * 1000UL); 3680 return (100 * 1000UL);
3676} 3681}
3682
3683static int gr_gv11b_ecc_scrub_is_done(struct gk20a *g,
3684 u32 scrub_reg, u32 scrub_mask, u32 scrub_done)
3685{
3686 struct nvgpu_timeout timeout;
3687 int status = 0;
3688 u32 val;
3689
3690 nvgpu_timeout_init(g, &timeout,
3691 ECC_SCRUBBING_TIMEOUT_MAX /
3692 ECC_SCRUBBING_TIMEOUT_DEFAULT,
3693 NVGPU_TIMER_RETRY_TIMER);
3694 do {
3695 val = gk20a_readl(g, scrub_reg);
3696 if ((val & scrub_mask) == scrub_done)
3697 goto exit;
3698 nvgpu_udelay(ECC_SCRUBBING_TIMEOUT_DEFAULT);
3699 } while (!nvgpu_timeout_expired(&timeout));
3700
3701 if (nvgpu_timeout_peek_expired(&timeout))
3702 status = -ETIMEDOUT;
3703exit:
3704 return status;
3705
3706}
3707
3708static int gr_gv11b_ecc_scrub_sm_lrf(struct gk20a *g)
3709{
3710 u32 scrub_mask, scrub_done;
3711
3712 if (!nvgpu_is_enabled(g, NVGPU_ECC_ENABLED_SM_LRF)) {
3713 nvgpu_log_info(g, "ECC SM LRF is disabled");
3714 return 0;
3715 }
3716
3717 nvgpu_log_info(g, "gr_gv11b_ecc_scrub_sm_lrf");
3718 scrub_mask =
3719 (gr_pri_gpcs_tpcs_sm_lrf_ecc_control_scrub_qrfdp0_task_f() |
3720 gr_pri_gpcs_tpcs_sm_lrf_ecc_control_scrub_qrfdp1_task_f() |
3721 gr_pri_gpcs_tpcs_sm_lrf_ecc_control_scrub_qrfdp2_task_f() |
3722 gr_pri_gpcs_tpcs_sm_lrf_ecc_control_scrub_qrfdp3_task_f() |
3723 gr_pri_gpcs_tpcs_sm_lrf_ecc_control_scrub_qrfdp4_task_f() |
3724 gr_pri_gpcs_tpcs_sm_lrf_ecc_control_scrub_qrfdp5_task_f() |
3725 gr_pri_gpcs_tpcs_sm_lrf_ecc_control_scrub_qrfdp6_task_f() |
3726 gr_pri_gpcs_tpcs_sm_lrf_ecc_control_scrub_qrfdp7_task_f());
3727
3728 /* Issue scrub lrf regions with single write command */
3729 gk20a_writel(g, gr_pri_gpcs_tpcs_sm_lrf_ecc_control_r(), scrub_mask);
3730
3731 scrub_done =
3732 (gr_pri_gpc0_tpc0_sm_lrf_ecc_control_scrub_qrfdp0_init_f() |
3733 gr_pri_gpc0_tpc0_sm_lrf_ecc_control_scrub_qrfdp1_init_f() |
3734 gr_pri_gpc0_tpc0_sm_lrf_ecc_control_scrub_qrfdp2_init_f() |
3735 gr_pri_gpc0_tpc0_sm_lrf_ecc_control_scrub_qrfdp3_init_f() |
3736 gr_pri_gpc0_tpc0_sm_lrf_ecc_control_scrub_qrfdp4_init_f() |
3737 gr_pri_gpc0_tpc0_sm_lrf_ecc_control_scrub_qrfdp5_init_f() |
3738 gr_pri_gpc0_tpc0_sm_lrf_ecc_control_scrub_qrfdp6_init_f() |
3739 gr_pri_gpc0_tpc0_sm_lrf_ecc_control_scrub_qrfdp7_init_f());
3740
3741 return gr_gv11b_ecc_scrub_is_done(g,
3742 gr_pri_gpc0_tpc0_sm_lrf_ecc_control_r(),
3743 scrub_mask, scrub_done);
3744}
3745
3746static int gr_gv11b_ecc_scrub_sm_l1_data(struct gk20a *g)
3747{
3748 u32 scrub_mask, scrub_done;
3749
3750 if (!nvgpu_is_enabled(g, NVGPU_ECC_ENABLED_SM_L1_DATA)) {
3751 nvgpu_log_info(g, "ECC L1DATA is disabled");
3752 return 0;
3753 }
3754 nvgpu_log_info(g, "gr_gv11b_ecc_scrub_sm_l1_data");
3755 scrub_mask =
3756 (gr_pri_gpcs_tpcs_sm_l1_data_ecc_control_scrub_el1_0_task_f() |
3757 gr_pri_gpcs_tpcs_sm_l1_data_ecc_control_scrub_el1_1_task_f());
3758
3759 gk20a_writel(g, gr_pri_gpcs_tpcs_sm_l1_data_ecc_control_r(),
3760 scrub_mask);
3761
3762 scrub_done =
3763 (gr_pri_gpc0_tpc0_sm_l1_data_ecc_control_scrub_el1_0_init_f() |
3764 gr_pri_gpc0_tpc0_sm_l1_data_ecc_control_scrub_el1_1_init_f());
3765 return gr_gv11b_ecc_scrub_is_done(g,
3766 gr_pri_gpc0_tpc0_sm_l1_data_ecc_control_r(),
3767 scrub_mask, scrub_done);
3768}
3769
3770static int gr_gv11b_ecc_scrub_sm_l1_tag(struct gk20a *g)
3771{
3772 u32 scrub_mask, scrub_done;
3773
3774 if (!nvgpu_is_enabled(g, NVGPU_ECC_ENABLED_SM_L1_TAG)) {
3775 nvgpu_log_info(g, "ECC L1TAG is disabled");
3776 return 0;
3777 }
3778 nvgpu_log_info(g, "gr_gv11b_ecc_scrub_sm_l1_tag");
3779 scrub_mask =
3780 (gr_pri_gpcs_tpcs_sm_l1_tag_ecc_control_scrub_el1_0_task_f() |
3781 gr_pri_gpcs_tpcs_sm_l1_tag_ecc_control_scrub_el1_1_task_f());
3782 gk20a_writel(g, gr_pri_gpcs_tpcs_sm_l1_tag_ecc_control_r(), scrub_mask);
3783
3784 scrub_done =
3785 (gr_pri_gpc0_tpc0_sm_l1_tag_ecc_control_scrub_el1_0_init_f() |
3786 gr_pri_gpc0_tpc0_sm_l1_tag_ecc_control_scrub_el1_1_init_f());
3787 return gr_gv11b_ecc_scrub_is_done(g,
3788 gr_pri_gpc0_tpc0_sm_l1_tag_ecc_control_r(),
3789 scrub_mask, scrub_done);
3790}
3791
3792static int gr_gv11b_ecc_scrub_sm_cbu(struct gk20a *g)
3793{
3794 u32 scrub_mask, scrub_done;
3795
3796 if (!nvgpu_is_enabled(g, NVGPU_ECC_ENABLED_SM_CBU)) {
3797 nvgpu_log_info(g, "ECC CBU is disabled");
3798 return 0;
3799 }
3800 nvgpu_log_info(g, "gr_gv11b_ecc_scrub_sm_cbu");
3801 scrub_mask =
3802 (gr_pri_gpcs_tpcs_sm_cbu_ecc_control_scrub_warp_sm0_task_f() |
3803 gr_pri_gpcs_tpcs_sm_cbu_ecc_control_scrub_warp_sm1_task_f() |
3804 gr_pri_gpcs_tpcs_sm_cbu_ecc_control_scrub_barrier_sm0_task_f() |
3805 gr_pri_gpcs_tpcs_sm_cbu_ecc_control_scrub_barrier_sm1_task_f());
3806 gk20a_writel(g, gr_pri_gpcs_tpcs_sm_cbu_ecc_control_r(), scrub_mask);
3807
3808 scrub_done =
3809 (gr_pri_gpc0_tpc0_sm_cbu_ecc_control_scrub_warp_sm0_init_f() |
3810 gr_pri_gpc0_tpc0_sm_cbu_ecc_control_scrub_warp_sm1_init_f() |
3811 gr_pri_gpc0_tpc0_sm_cbu_ecc_control_scrub_barrier_sm0_init_f() |
3812 gr_pri_gpc0_tpc0_sm_cbu_ecc_control_scrub_barrier_sm1_init_f());
3813 return gr_gv11b_ecc_scrub_is_done(g,
3814 gr_pri_gpc0_tpc0_sm_cbu_ecc_control_r(),
3815 scrub_mask, scrub_done);
3816}
3817
3818static int gr_gv11b_ecc_scrub_sm_icahe(struct gk20a *g)
3819{
3820 u32 scrub_mask, scrub_done;
3821
3822 if (!nvgpu_is_enabled(g, NVGPU_ECC_ENABLED_SM_ICACHE)) {
3823 nvgpu_log_info(g, "ECC ICAHE is disabled");
3824 return 0;
3825 }
3826 nvgpu_log_info(g, "gr_gv11b_ecc_scrub_sm_icahe");
3827 scrub_mask =
3828 (gr_pri_gpcs_tpcs_sm_icache_ecc_control_scrub_l0_data_task_f() |
3829 gr_pri_gpcs_tpcs_sm_icache_ecc_control_scrub_l0_predecode_task_f() |
3830 gr_pri_gpcs_tpcs_sm_icache_ecc_control_scrub_l1_data_task_f() |
3831 gr_pri_gpcs_tpcs_sm_icache_ecc_control_scrub_l1_predecode_task_f());
3832 gk20a_writel(g, gr_pri_gpcs_tpcs_sm_icache_ecc_control_r(), scrub_mask);
3833
3834 scrub_done =
3835 (gr_pri_gpc0_tpc0_sm_icache_ecc_control_scrub_l0_data_init_f() |
3836 gr_pri_gpc0_tpc0_sm_icache_ecc_control_scrub_l0_predecode_init_f() |
3837 gr_pri_gpc0_tpc0_sm_icache_ecc_control_scrub_l1_data_init_f() |
3838 gr_pri_gpc0_tpc0_sm_icache_ecc_control_scrub_l1_predecode_init_f());
3839 return gr_gv11b_ecc_scrub_is_done(g,
3840 gr_pri_gpc0_tpc0_sm_icache_ecc_control_r(),
3841 scrub_mask, scrub_done);
3842}
3843
3844void gr_gv11b_ecc_init_scrub_reg(struct gk20a *g)
3845{
3846 nvgpu_log_fn(g, "ecc srub start ");
3847
3848 gv11b_detect_ecc_enabled_units(g);
3849
3850 if (gr_gv11b_ecc_scrub_sm_lrf(g))
3851 nvgpu_warn(g, "ECC SCRUB SM LRF Failed");
3852 if (gr_gv11b_ecc_scrub_sm_l1_data(g))
3853 nvgpu_warn(g, "ECC SCRUB SM L1 DATA Failed");
3854 if (gr_gv11b_ecc_scrub_sm_l1_tag(g))
3855 nvgpu_warn(g, "ECC SCRUB SM L1 TAG Failed");
3856 if (gr_gv11b_ecc_scrub_sm_cbu(g))
3857 nvgpu_warn(g, "ECC SCRUB SM CBU Failed");
3858 if (gr_gv11b_ecc_scrub_sm_icahe(g))
3859 nvgpu_warn(g, "ECC SCRUB SM ICACHE Failed");
3860
3861}
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h
index 7c56f62d..39d12b3f 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h
@@ -216,5 +216,6 @@ void gr_gv11b_init_gpc_mmu(struct gk20a *g);
216int gr_gv11b_init_preemption_state(struct gk20a *g); 216int gr_gv11b_init_preemption_state(struct gk20a *g);
217void gr_gv11b_init_gfxp_wfi_timeout_count(struct gk20a *g); 217void gr_gv11b_init_gfxp_wfi_timeout_count(struct gk20a *g);
218unsigned long gr_gv11b_get_max_gfxp_wfi_timeout_count(struct gk20a *g); 218unsigned long gr_gv11b_get_max_gfxp_wfi_timeout_count(struct gk20a *g);
219void gr_gv11b_ecc_init_scrub_reg(struct gk20a *g);
219 220
220#endif 221#endif
diff --git a/drivers/gpu/nvgpu/gv11b/gv11b.c b/drivers/gpu/nvgpu/gv11b/gv11b.c
index 211755e5..a62e49fb 100644
--- a/drivers/gpu/nvgpu/gv11b/gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/gv11b.c
@@ -26,8 +26,128 @@
26#include <nvgpu/enabled_t19x.h> 26#include <nvgpu/enabled_t19x.h>
27 27
28#include "gk20a/gk20a.h" 28#include "gk20a/gk20a.h"
29#include "gp10b/gp10b.h"
29 30
30#include "gv11b/gv11b.h" 31#include "gv11b/gv11b.h"
32#include <nvgpu/hw/gv11b/hw_fuse_gv11b.h>
33#include <nvgpu/hw/gv11b/hw_gr_gv11b.h>
34
35void gv11b_detect_ecc_enabled_units(struct gk20a *g)
36{
37 u32 opt_ecc_en = gk20a_readl(g, fuse_opt_ecc_en_r());
38 u32 opt_feature_fuses_override_disable =
39 gk20a_readl(g,
40 fuse_opt_feature_fuses_override_disable_r());
41 u32 fecs_feature_override_ecc =
42 gk20a_readl(g,
43 gr_fecs_feature_override_ecc_r());
44
45 if (opt_feature_fuses_override_disable) {
46 if (opt_ecc_en) {
47 __nvgpu_set_enabled(g,
48 NVGPU_ECC_ENABLED_SM_LRF, true);
49 __nvgpu_set_enabled(g,
50 NVGPU_ECC_ENABLED_SM_L1_DATA, true);
51 __nvgpu_set_enabled(g,
52 NVGPU_ECC_ENABLED_SM_L1_TAG, true);
53 __nvgpu_set_enabled(g,
54 NVGPU_ECC_ENABLED_SM_ICACHE, true);
55 __nvgpu_set_enabled(g, NVGPU_ECC_ENABLED_LTC, true);
56 __nvgpu_set_enabled(g, NVGPU_ECC_ENABLED_SM_CBU, true);
57 }
58 } else {
59 /* SM LRF */
60 if (gr_fecs_feature_override_ecc_sm_lrf_override_v(
61 fecs_feature_override_ecc)) {
62 if (gr_fecs_feature_override_ecc_sm_lrf_v(
63 fecs_feature_override_ecc)) {
64 __nvgpu_set_enabled(g,
65 NVGPU_ECC_ENABLED_SM_LRF, true);
66 }
67 } else {
68 if (opt_ecc_en) {
69 __nvgpu_set_enabled(g,
70 NVGPU_ECC_ENABLED_SM_LRF, true);
71 }
72 }
73 /* SM L1 DATA*/
74 if (gr_fecs_feature_override_ecc_sm_l1_data_override_v(
75 fecs_feature_override_ecc)) {
76 if (gr_fecs_feature_override_ecc_sm_l1_data_v(
77 fecs_feature_override_ecc)) {
78 __nvgpu_set_enabled(g,
79 NVGPU_ECC_ENABLED_SM_L1_DATA, true);
80 }
81 } else {
82 if (opt_ecc_en) {
83 __nvgpu_set_enabled(g,
84 NVGPU_ECC_ENABLED_SM_L1_DATA, true);
85 }
86 }
87 /* SM L1 TAG*/
88 if (gr_fecs_feature_override_ecc_sm_l1_tag_override_v(
89 fecs_feature_override_ecc)) {
90 if (gr_fecs_feature_override_ecc_sm_l1_tag_v(
91 fecs_feature_override_ecc)) {
92 __nvgpu_set_enabled(g,
93 NVGPU_ECC_ENABLED_SM_L1_TAG, true);
94 }
95 } else {
96 if (opt_ecc_en) {
97 __nvgpu_set_enabled(g,
98 NVGPU_ECC_ENABLED_SM_L1_TAG, true);
99 }
100 }
101 /* SM ICACHE*/
102 if (gr_fecs_feature_override_ecc_1_sm_l0_icache_override_v(
103 fecs_feature_override_ecc) &&
104 gr_fecs_feature_override_ecc_1_sm_l1_icache_override_v(
105 fecs_feature_override_ecc)) {
106 if (gr_fecs_feature_override_ecc_1_sm_l0_icache_v(
107 fecs_feature_override_ecc) &&
108 gr_fecs_feature_override_ecc_1_sm_l1_icache_v(
109 fecs_feature_override_ecc)) {
110 __nvgpu_set_enabled(g,
111 NVGPU_ECC_ENABLED_SM_ICACHE, true);
112 }
113 } else {
114 if (opt_ecc_en) {
115 __nvgpu_set_enabled(g,
116 NVGPU_ECC_ENABLED_SM_ICACHE, true);
117 }
118 }
119 /* LTC */
120 if (gr_fecs_feature_override_ecc_ltc_override_v(
121 fecs_feature_override_ecc)) {
122 if (gr_fecs_feature_override_ecc_ltc_v(
123 fecs_feature_override_ecc)) {
124 __nvgpu_set_enabled(g,
125 NVGPU_ECC_ENABLED_LTC, true);
126 }
127 } else {
128 if (opt_ecc_en) {
129 __nvgpu_set_enabled(g,
130 NVGPU_ECC_ENABLED_LTC, true);
131 }
132 }
133 /* SM CBU */
134 if (gr_fecs_feature_override_ecc_sm_cbu_override_v(
135 fecs_feature_override_ecc)) {
136 if (gr_fecs_feature_override_ecc_sm_cbu_v(
137 fecs_feature_override_ecc)) {
138 __nvgpu_set_enabled(g,
139 NVGPU_ECC_ENABLED_SM_CBU, true);
140 }
141 } else {
142 if (opt_ecc_en) {
143 __nvgpu_set_enabled(g,
144 NVGPU_ECC_ENABLED_SM_CBU, true);
145 }
146 }
147 }
148}
149
150
31 151
32int gv11b_init_gpu_characteristics(struct gk20a *g) 152int gv11b_init_gpu_characteristics(struct gk20a *g)
33{ 153{
diff --git a/drivers/gpu/nvgpu/gv11b/gv11b.h b/drivers/gpu/nvgpu/gv11b/gv11b.h
index 3d5490e6..17dfa7aa 100644
--- a/drivers/gpu/nvgpu/gv11b/gv11b.h
+++ b/drivers/gpu/nvgpu/gv11b/gv11b.h
@@ -27,6 +27,7 @@
27 27
28#include "gk20a/gk20a.h" 28#include "gk20a/gk20a.h"
29 29
30void gv11b_detect_ecc_enabled_units(struct gk20a *g);
30int gv11b_init_gpu_characteristics(struct gk20a *g); 31int gv11b_init_gpu_characteristics(struct gk20a *g);
31 32
32#endif /* GV11B_H */ 33#endif /* GV11B_H */
diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
index f6bdf6e5..65cae8de 100644
--- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
@@ -392,6 +392,7 @@ static const struct gpu_ops gv11b_ops = {
392 gr_gv11b_init_gfxp_wfi_timeout_count, 392 gr_gv11b_init_gfxp_wfi_timeout_count,
393 .get_max_gfxp_wfi_timeout_count = 393 .get_max_gfxp_wfi_timeout_count =
394 gr_gv11b_get_max_gfxp_wfi_timeout_count, 394 gr_gv11b_get_max_gfxp_wfi_timeout_count,
395 .ecc_init_scrub_reg = gr_gv11b_ecc_init_scrub_reg,
395 }, 396 },
396 .fb = { 397 .fb = {
397 .reset = gv11b_fb_reset, 398 .reset = gv11b_fb_reset,