aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJay Cornwall <Jay.Cornwall@amd.com>2018-05-01 17:56:02 -0400
committerOded Gabbay <oded.gabbay@gmail.com>2018-05-01 17:56:02 -0400
commit87e6d4e0777daf774ed9aa59ed25b6ebaaad7052 (patch)
tree1062fa04a237eef7021afd3401bea92ddb5015f3
parent24f48a42038f5baaae49b181b64782ecfb703a9c (diff)
drm/amdkfd: Reduce priority of context-saving waves before spin-wait
Synchronization between context-saving wavefronts is achieved by sending a SAVEWAVE message to the SPI and then spin-waiting for a response. These spin-waiting wavefronts may inhibit the progress of other wavefronts in the context save handler, leading to the synchronization condition never being achieved. Before spin-waiting reduce the priority of each wavefront to guarantee foward progress in the others. Signed-off-by: Jay Cornwall <Jay.Cornwall@amd.com> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com> Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm10
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm8
2 files changed, 15 insertions, 3 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm
index 997a383dcb8b..34eabcdd27a0 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm
@@ -98,6 +98,7 @@ var SWIZZLE_EN = 0 //whether we use swi
98/**************************************************************************/ 98/**************************************************************************/
99var SQ_WAVE_STATUS_INST_ATC_SHIFT = 23 99var SQ_WAVE_STATUS_INST_ATC_SHIFT = 23
100var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000 100var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000
101var SQ_WAVE_STATUS_SPI_PRIO_SHIFT = 1
101var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006 102var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006
102 103
103var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12 104var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12
@@ -319,6 +320,10 @@ end
319 s_sendmsg sendmsg(MSG_SAVEWAVE) //send SPI a message and wait for SPI's write to EXEC 320 s_sendmsg sendmsg(MSG_SAVEWAVE) //send SPI a message and wait for SPI's write to EXEC
320 end 321 end
321 322
323 // Set SPI_PRIO=2 to avoid starving instruction fetch in the waves we're waiting for.
324 s_or_b32 s_save_tmp, s_save_status, (2 << SQ_WAVE_STATUS_SPI_PRIO_SHIFT)
325 s_setreg_b32 hwreg(HW_REG_STATUS), s_save_tmp
326
322 L_SLEEP: 327 L_SLEEP:
323 s_sleep 0x2 // sleep 1 (64clk) is not enough for 8 waves per SIMD, which will cause SQ hang, since the 7,8th wave could not get arbit to exec inst, while other waves are stuck into the sleep-loop and waiting for wrexec!=0 328 s_sleep 0x2 // sleep 1 (64clk) is not enough for 8 waves per SIMD, which will cause SQ hang, since the 7,8th wave could not get arbit to exec inst, while other waves are stuck into the sleep-loop and waiting for wrexec!=0
324 329
@@ -1132,7 +1137,7 @@ end
1132#endif 1137#endif
1133 1138
1134static const uint32_t cwsr_trap_gfx8_hex[] = { 1139static const uint32_t cwsr_trap_gfx8_hex[] = {
1135 0xbf820001, 0xbf820123, 1140 0xbf820001, 0xbf820125,
1136 0xb8f4f802, 0x89748674, 1141 0xb8f4f802, 0x89748674,
1137 0xb8f5f803, 0x8675ff75, 1142 0xb8f5f803, 0x8675ff75,
1138 0x00000400, 0xbf850011, 1143 0x00000400, 0xbf850011,
@@ -1158,7 +1163,8 @@ static const uint32_t cwsr_trap_gfx8_hex[] = {
1158 0x867aff7a, 0x00007fff, 1163 0x867aff7a, 0x00007fff,
1159 0xb97af807, 0xbef2007e, 1164 0xb97af807, 0xbef2007e,
1160 0xbef3007f, 0xbefe0180, 1165 0xbef3007f, 0xbefe0180,
1161 0xbf900004, 0xbf8e0002, 1166 0xbf900004, 0x877a8474,
1167 0xb97af802, 0xbf8e0002,
1162 0xbf88fffe, 0xbef8007e, 1168 0xbf88fffe, 0xbef8007e,
1163 0x8679ff7f, 0x0000ffff, 1169 0x8679ff7f, 0x0000ffff,
1164 0x8779ff79, 0x00040000, 1170 0x8779ff79, 0x00040000,
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
index 033580c997ea..cac8d4992e04 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
@@ -97,6 +97,7 @@ var ACK_SQC_STORE = 1 //workaround for suspected SQC store bug causing
97/**************************************************************************/ 97/**************************************************************************/
98var SQ_WAVE_STATUS_INST_ATC_SHIFT = 23 98var SQ_WAVE_STATUS_INST_ATC_SHIFT = 23
99var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000 99var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000
100var SQ_WAVE_STATUS_SPI_PRIO_SHIFT = 1
100var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006 101var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006
101var SQ_WAVE_STATUS_HALT_MASK = 0x2000 102var SQ_WAVE_STATUS_HALT_MASK = 0x2000
102 103
@@ -362,6 +363,10 @@ end
362 s_sendmsg sendmsg(MSG_SAVEWAVE) //send SPI a message and wait for SPI's write to EXEC 363 s_sendmsg sendmsg(MSG_SAVEWAVE) //send SPI a message and wait for SPI's write to EXEC
363 end 364 end
364 365
366 // Set SPI_PRIO=2 to avoid starving instruction fetch in the waves we're waiting for.
367 s_or_b32 s_save_tmp, s_save_status, (2 << SQ_WAVE_STATUS_SPI_PRIO_SHIFT)
368 s_setreg_b32 hwreg(HW_REG_STATUS), s_save_tmp
369
365 L_SLEEP: 370 L_SLEEP:
366 s_sleep 0x2 // sleep 1 (64clk) is not enough for 8 waves per SIMD, which will cause SQ hang, since the 7,8th wave could not get arbit to exec inst, while other waves are stuck into the sleep-loop and waiting for wrexec!=0 371 s_sleep 0x2 // sleep 1 (64clk) is not enough for 8 waves per SIMD, which will cause SQ hang, since the 7,8th wave could not get arbit to exec inst, while other waves are stuck into the sleep-loop and waiting for wrexec!=0
367 372
@@ -1210,7 +1215,7 @@ end
1210#endif 1215#endif
1211 1216
1212static const uint32_t cwsr_trap_gfx9_hex[] = { 1217static const uint32_t cwsr_trap_gfx9_hex[] = {
1213 0xbf820001, 0xbf820158, 1218 0xbf820001, 0xbf82015a,
1214 0xb8f8f802, 0x89788678, 1219 0xb8f8f802, 0x89788678,
1215 0xb8f1f803, 0x866eff71, 1220 0xb8f1f803, 0x866eff71,
1216 0x00000400, 0xbf850034, 1221 0x00000400, 0xbf850034,
@@ -1249,6 +1254,7 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
1249 0x00007fff, 0xb970f807, 1254 0x00007fff, 0xb970f807,
1250 0xbeee007e, 0xbeef007f, 1255 0xbeee007e, 0xbeef007f,
1251 0xbefe0180, 0xbf900004, 1256 0xbefe0180, 0xbf900004,
1257 0x87708478, 0xb970f802,
1252 0xbf8e0002, 0xbf88fffe, 1258 0xbf8e0002, 0xbf88fffe,
1253 0xb8f02a05, 0x80708170, 1259 0xb8f02a05, 0x80708170,
1254 0x8e708a70, 0xb8f11605, 1260 0x8e708a70, 0xb8f11605,