aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu
diff options
context:
space:
mode:
authorAlex Deucher <alexander.deucher@amd.com>2015-11-24 17:43:42 -0500
committerAlex Deucher <alexander.deucher@amd.com>2015-12-02 15:54:26 -0500
commitccba7691a580a0967f60a512473ce699b9edac0d (patch)
tree6a86aee8f3d2c4267cd9bb37549322d259c50b7b /drivers/gpu/drm/amd/amdgpu
parentaa5e24e5f8a83b19b1b19964f35562c7a42636e2 (diff)
drm/amdgpu: add EDC support for CZ (v3)
This adds EDC support for CZ. EDC = Error Correction and Detection This code properly initializes the EDC hardware and resets the error counts. This is done in late_init since it requires the IB pool which is not initialized during hw_init. v2: fix the IB size as noted by Felix, fix shader pgm register programming v3: use the IB for the shaders as suggested by Christian Reviewed-by: Christian König <christian.koenig@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c331
1 files changed, 330 insertions, 1 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index e1dcab98e249..07c1ec30259c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -964,6 +964,322 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
964 return 0; 964 return 0;
965} 965}
966 966
967static const u32 vgpr_init_compute_shader[] =
968{
969 0x7e000209, 0x7e020208,
970 0x7e040207, 0x7e060206,
971 0x7e080205, 0x7e0a0204,
972 0x7e0c0203, 0x7e0e0202,
973 0x7e100201, 0x7e120200,
974 0x7e140209, 0x7e160208,
975 0x7e180207, 0x7e1a0206,
976 0x7e1c0205, 0x7e1e0204,
977 0x7e200203, 0x7e220202,
978 0x7e240201, 0x7e260200,
979 0x7e280209, 0x7e2a0208,
980 0x7e2c0207, 0x7e2e0206,
981 0x7e300205, 0x7e320204,
982 0x7e340203, 0x7e360202,
983 0x7e380201, 0x7e3a0200,
984 0x7e3c0209, 0x7e3e0208,
985 0x7e400207, 0x7e420206,
986 0x7e440205, 0x7e460204,
987 0x7e480203, 0x7e4a0202,
988 0x7e4c0201, 0x7e4e0200,
989 0x7e500209, 0x7e520208,
990 0x7e540207, 0x7e560206,
991 0x7e580205, 0x7e5a0204,
992 0x7e5c0203, 0x7e5e0202,
993 0x7e600201, 0x7e620200,
994 0x7e640209, 0x7e660208,
995 0x7e680207, 0x7e6a0206,
996 0x7e6c0205, 0x7e6e0204,
997 0x7e700203, 0x7e720202,
998 0x7e740201, 0x7e760200,
999 0x7e780209, 0x7e7a0208,
1000 0x7e7c0207, 0x7e7e0206,
1001 0xbf8a0000, 0xbf810000,
1002};
1003
1004static const u32 sgpr_init_compute_shader[] =
1005{
1006 0xbe8a0100, 0xbe8c0102,
1007 0xbe8e0104, 0xbe900106,
1008 0xbe920108, 0xbe940100,
1009 0xbe960102, 0xbe980104,
1010 0xbe9a0106, 0xbe9c0108,
1011 0xbe9e0100, 0xbea00102,
1012 0xbea20104, 0xbea40106,
1013 0xbea60108, 0xbea80100,
1014 0xbeaa0102, 0xbeac0104,
1015 0xbeae0106, 0xbeb00108,
1016 0xbeb20100, 0xbeb40102,
1017 0xbeb60104, 0xbeb80106,
1018 0xbeba0108, 0xbebc0100,
1019 0xbebe0102, 0xbec00104,
1020 0xbec20106, 0xbec40108,
1021 0xbec60100, 0xbec80102,
1022 0xbee60004, 0xbee70005,
1023 0xbeea0006, 0xbeeb0007,
1024 0xbee80008, 0xbee90009,
1025 0xbefc0000, 0xbf8a0000,
1026 0xbf810000, 0x00000000,
1027};
1028
1029static const u32 vgpr_init_regs[] =
1030{
1031 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1032 mmCOMPUTE_RESOURCE_LIMITS, 0,
1033 mmCOMPUTE_NUM_THREAD_X, 256*4,
1034 mmCOMPUTE_NUM_THREAD_Y, 1,
1035 mmCOMPUTE_NUM_THREAD_Z, 1,
1036 mmCOMPUTE_PGM_RSRC2, 20,
1037 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1038 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1039 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1040 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1041 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1042 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1043 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1044 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1045 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1046 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1047};
1048
1049static const u32 sgpr1_init_regs[] =
1050{
1051 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1052 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1053 mmCOMPUTE_NUM_THREAD_X, 256*5,
1054 mmCOMPUTE_NUM_THREAD_Y, 1,
1055 mmCOMPUTE_NUM_THREAD_Z, 1,
1056 mmCOMPUTE_PGM_RSRC2, 20,
1057 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1058 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1059 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1060 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1061 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1062 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1063 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1064 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1065 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1066 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1067};
1068
1069static const u32 sgpr2_init_regs[] =
1070{
1071 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1072 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1073 mmCOMPUTE_NUM_THREAD_X, 256*5,
1074 mmCOMPUTE_NUM_THREAD_Y, 1,
1075 mmCOMPUTE_NUM_THREAD_Z, 1,
1076 mmCOMPUTE_PGM_RSRC2, 20,
1077 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1078 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1079 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1080 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1081 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1082 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1083 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1084 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1085 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1086 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1087};
1088
1089static const u32 sec_ded_counter_registers[] =
1090{
1091 mmCPC_EDC_ATC_CNT,
1092 mmCPC_EDC_SCRATCH_CNT,
1093 mmCPC_EDC_UCODE_CNT,
1094 mmCPF_EDC_ATC_CNT,
1095 mmCPF_EDC_ROQ_CNT,
1096 mmCPF_EDC_TAG_CNT,
1097 mmCPG_EDC_ATC_CNT,
1098 mmCPG_EDC_DMA_CNT,
1099 mmCPG_EDC_TAG_CNT,
1100 mmDC_EDC_CSINVOC_CNT,
1101 mmDC_EDC_RESTORE_CNT,
1102 mmDC_EDC_STATE_CNT,
1103 mmGDS_EDC_CNT,
1104 mmGDS_EDC_GRBM_CNT,
1105 mmGDS_EDC_OA_DED,
1106 mmSPI_EDC_CNT,
1107 mmSQC_ATC_EDC_GATCL1_CNT,
1108 mmSQC_EDC_CNT,
1109 mmSQ_EDC_DED_CNT,
1110 mmSQ_EDC_INFO,
1111 mmSQ_EDC_SEC_CNT,
1112 mmTCC_EDC_CNT,
1113 mmTCP_ATC_EDC_GATCL1_CNT,
1114 mmTCP_EDC_CNT,
1115 mmTD_EDC_CNT
1116};
1117
1118static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1119{
1120 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1121 struct amdgpu_ib ib;
1122 struct fence *f = NULL;
1123 int r, i;
1124 u32 tmp;
1125 unsigned total_size, vgpr_offset, sgpr_offset;
1126 u64 gpu_addr;
1127
1128 /* only supported on CZ */
1129 if (adev->asic_type != CHIP_CARRIZO)
1130 return 0;
1131
1132 /* bail if the compute ring is not ready */
1133 if (!ring->ready)
1134 return 0;
1135
1136 tmp = RREG32(mmGB_EDC_MODE);
1137 WREG32(mmGB_EDC_MODE, 0);
1138
1139 total_size =
1140 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1141 total_size +=
1142 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1143 total_size +=
1144 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1145 total_size = ALIGN(total_size, 256);
1146 vgpr_offset = total_size;
1147 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1148 sgpr_offset = total_size;
1149 total_size += sizeof(sgpr_init_compute_shader);
1150
1151 /* allocate an indirect buffer to put the commands in */
1152 memset(&ib, 0, sizeof(ib));
1153 r = amdgpu_ib_get(ring, NULL, total_size, &ib);
1154 if (r) {
1155 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1156 return r;
1157 }
1158
1159 /* load the compute shaders */
1160 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1161 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1162
1163 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1164 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1165
1166 /* init the ib length to 0 */
1167 ib.length_dw = 0;
1168
1169 /* VGPR */
1170 /* write the register state for the compute dispatch */
1171 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1172 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1173 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1174 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1175 }
1176 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1177 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1178 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1179 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1180 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1181 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1182
1183 /* write dispatch packet */
1184 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1185 ib.ptr[ib.length_dw++] = 8; /* x */
1186 ib.ptr[ib.length_dw++] = 1; /* y */
1187 ib.ptr[ib.length_dw++] = 1; /* z */
1188 ib.ptr[ib.length_dw++] =
1189 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1190
1191 /* write CS partial flush packet */
1192 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1193 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1194
1195 /* SGPR1 */
1196 /* write the register state for the compute dispatch */
1197 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1198 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1199 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1200 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1201 }
1202 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1203 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1204 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1205 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1206 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1207 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1208
1209 /* write dispatch packet */
1210 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1211 ib.ptr[ib.length_dw++] = 8; /* x */
1212 ib.ptr[ib.length_dw++] = 1; /* y */
1213 ib.ptr[ib.length_dw++] = 1; /* z */
1214 ib.ptr[ib.length_dw++] =
1215 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1216
1217 /* write CS partial flush packet */
1218 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1219 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1220
1221 /* SGPR2 */
1222 /* write the register state for the compute dispatch */
1223 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1224 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1225 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1226 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1227 }
1228 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1229 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1230 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1231 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1232 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1233 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1234
1235 /* write dispatch packet */
1236 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1237 ib.ptr[ib.length_dw++] = 8; /* x */
1238 ib.ptr[ib.length_dw++] = 1; /* y */
1239 ib.ptr[ib.length_dw++] = 1; /* z */
1240 ib.ptr[ib.length_dw++] =
1241 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1242
1243 /* write CS partial flush packet */
1244 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1245 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1246
1247 /* shedule the ib on the ring */
1248 r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL,
1249 AMDGPU_FENCE_OWNER_UNDEFINED,
1250 &f);
1251 if (r) {
1252 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1253 goto fail;
1254 }
1255
1256 /* wait for the GPU to finish processing the IB */
1257 r = fence_wait(f, false);
1258 if (r) {
1259 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1260 goto fail;
1261 }
1262
1263 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1264 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1265 WREG32(mmGB_EDC_MODE, tmp);
1266
1267 tmp = RREG32(mmCC_GC_EDC_CONFIG);
1268 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1269 WREG32(mmCC_GC_EDC_CONFIG, tmp);
1270
1271
1272 /* read back registers to clear the counters */
1273 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1274 RREG32(sec_ded_counter_registers[i]);
1275
1276fail:
1277 fence_put(f);
1278 amdgpu_ib_free(adev, &ib);
1279
1280 return r;
1281}
1282
967static void gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) 1283static void gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
968{ 1284{
969 u32 gb_addr_config; 1285 u32 gb_addr_config;
@@ -4458,6 +4774,19 @@ static int gfx_v8_0_early_init(void *handle)
4458 return 0; 4774 return 0;
4459} 4775}
4460 4776
4777static int gfx_v8_0_late_init(void *handle)
4778{
4779 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4780 int r;
4781
4782 /* requires IBs so do in late init after IB pool is initialized */
4783 r = gfx_v8_0_do_edc_gpr_workarounds(adev);
4784 if (r)
4785 return r;
4786
4787 return 0;
4788}
4789
4461static int gfx_v8_0_set_powergating_state(void *handle, 4790static int gfx_v8_0_set_powergating_state(void *handle,
4462 enum amd_powergating_state state) 4791 enum amd_powergating_state state)
4463{ 4792{
@@ -4995,7 +5324,7 @@ static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
4995 5324
4996const struct amd_ip_funcs gfx_v8_0_ip_funcs = { 5325const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
4997 .early_init = gfx_v8_0_early_init, 5326 .early_init = gfx_v8_0_early_init,
4998 .late_init = NULL, 5327 .late_init = gfx_v8_0_late_init,
4999 .sw_init = gfx_v8_0_sw_init, 5328 .sw_init = gfx_v8_0_sw_init,
5000 .sw_fini = gfx_v8_0_sw_fini, 5329 .sw_fini = gfx_v8_0_sw_fini,
5001 .hw_init = gfx_v8_0_hw_init, 5330 .hw_init = gfx_v8_0_hw_init,